sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *CREATABLES, 573 *SUBQUERY_PREDICATES, 574 *TYPE_TOKENS, 575 *NO_PAREN_FUNCTIONS, 576 } 577 ID_VAR_TOKENS.remove(TokenType.UNION) 578 579 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 580 TokenType.ANTI, 581 TokenType.ASOF, 582 TokenType.FULL, 583 TokenType.LEFT, 584 TokenType.LOCK, 585 TokenType.NATURAL, 586 TokenType.RIGHT, 587 TokenType.SEMI, 588 TokenType.WINDOW, 589 } 590 591 ALIAS_TOKENS = ID_VAR_TOKENS 592 593 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 594 595 ARRAY_CONSTRUCTORS = { 596 "ARRAY": exp.Array, 597 "LIST": exp.List, 598 } 599 600 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 601 602 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 603 604 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 605 606 FUNC_TOKENS = { 607 TokenType.COLLATE, 608 TokenType.COMMAND, 609 TokenType.CURRENT_DATE, 610 TokenType.CURRENT_DATETIME, 611 TokenType.CURRENT_SCHEMA, 612 TokenType.CURRENT_TIMESTAMP, 613 TokenType.CURRENT_TIME, 614 TokenType.CURRENT_USER, 615 TokenType.FILTER, 616 TokenType.FIRST, 617 TokenType.FORMAT, 618 TokenType.GET, 619 TokenType.GLOB, 620 TokenType.IDENTIFIER, 621 TokenType.INDEX, 622 TokenType.ISNULL, 623 TokenType.ILIKE, 624 TokenType.INSERT, 625 TokenType.LIKE, 626 TokenType.MERGE, 627 TokenType.NEXT, 628 TokenType.OFFSET, 629 TokenType.PRIMARY_KEY, 630 TokenType.RANGE, 631 TokenType.REPLACE, 632 TokenType.RLIKE, 633 TokenType.ROW, 634 TokenType.UNNEST, 635 TokenType.VAR, 636 TokenType.LEFT, 637 TokenType.RIGHT, 638 TokenType.SEQUENCE, 639 TokenType.DATE, 640 TokenType.DATETIME, 641 TokenType.TABLE, 642 TokenType.TIMESTAMP, 643 TokenType.TIMESTAMPTZ, 644 TokenType.TRUNCATE, 645 TokenType.UTC_DATE, 646 TokenType.UTC_TIME, 647 TokenType.UTC_TIMESTAMP, 648 TokenType.WINDOW, 649 TokenType.XOR, 650 *TYPE_TOKENS, 651 *SUBQUERY_PREDICATES, 652 } 653 654 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.AND: exp.And, 656 } 657 658 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.COLON_EQ: exp.PropertyEQ, 660 } 661 662 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 663 TokenType.OR: exp.Or, 664 } 665 666 EQUALITY = { 667 TokenType.EQ: exp.EQ, 668 TokenType.NEQ: exp.NEQ, 669 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 670 } 671 672 COMPARISON = { 673 TokenType.GT: exp.GT, 674 TokenType.GTE: exp.GTE, 675 TokenType.LT: exp.LT, 676 TokenType.LTE: exp.LTE, 677 } 678 679 BITWISE = { 680 TokenType.AMP: exp.BitwiseAnd, 681 TokenType.CARET: exp.BitwiseXor, 682 TokenType.PIPE: exp.BitwiseOr, 683 } 684 685 TERM = { 686 TokenType.DASH: exp.Sub, 687 TokenType.PLUS: exp.Add, 688 TokenType.MOD: exp.Mod, 689 TokenType.COLLATE: exp.Collate, 690 } 691 692 FACTOR = { 693 TokenType.DIV: exp.IntDiv, 694 TokenType.LR_ARROW: exp.Distance, 695 TokenType.SLASH: exp.Div, 696 TokenType.STAR: exp.Mul, 697 } 698 699 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 700 701 TIMES = { 702 TokenType.TIME, 703 TokenType.TIMETZ, 704 } 705 706 TIMESTAMPS = { 707 TokenType.TIMESTAMP, 708 TokenType.TIMESTAMPNTZ, 709 TokenType.TIMESTAMPTZ, 710 TokenType.TIMESTAMPLTZ, 711 *TIMES, 712 } 713 714 SET_OPERATIONS = { 715 TokenType.UNION, 716 TokenType.INTERSECT, 717 TokenType.EXCEPT, 718 } 719 720 JOIN_METHODS = { 721 TokenType.ASOF, 722 TokenType.NATURAL, 723 TokenType.POSITIONAL, 724 } 725 726 JOIN_SIDES = { 727 TokenType.LEFT, 728 TokenType.RIGHT, 729 TokenType.FULL, 730 } 731 732 JOIN_KINDS = { 733 TokenType.ANTI, 734 TokenType.CROSS, 735 TokenType.INNER, 736 TokenType.OUTER, 737 TokenType.SEMI, 738 TokenType.STRAIGHT_JOIN, 739 } 740 741 JOIN_HINTS: t.Set[str] = set() 742 743 LAMBDAS = { 744 TokenType.ARROW: lambda self, expressions: self.expression( 745 exp.Lambda, 746 this=self._replace_lambda( 747 self._parse_assignment(), 748 expressions, 749 ), 750 expressions=expressions, 751 ), 752 TokenType.FARROW: lambda self, expressions: self.expression( 753 exp.Kwarg, 754 this=exp.var(expressions[0].name), 755 expression=self._parse_assignment(), 756 ), 757 } 758 759 COLUMN_OPERATORS = { 760 TokenType.DOT: None, 761 TokenType.DOTCOLON: lambda self, this, to: self.expression( 762 exp.JSONCast, 763 this=this, 764 to=to, 765 ), 766 TokenType.DCOLON: lambda self, this, to: self.build_cast( 767 strict=self.STRICT_CAST, this=this, to=to 768 ), 769 TokenType.ARROW: lambda self, this, path: self.expression( 770 exp.JSONExtract, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.DARROW: lambda self, this, path: self.expression( 776 exp.JSONExtractScalar, 777 this=this, 778 expression=self.dialect.to_json_path(path), 779 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 780 ), 781 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 782 exp.JSONBExtract, 783 this=this, 784 expression=path, 785 ), 786 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 787 exp.JSONBExtractScalar, 788 this=this, 789 expression=path, 790 ), 791 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 792 exp.JSONBContains, 793 this=this, 794 expression=key, 795 ), 796 } 797 798 CAST_COLUMN_OPERATORS = { 799 TokenType.DOTCOLON, 800 TokenType.DCOLON, 801 } 802 803 EXPRESSION_PARSERS = { 804 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 805 exp.Column: lambda self: self._parse_column(), 806 exp.Condition: lambda self: self._parse_assignment(), 807 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 808 exp.Expression: lambda self: self._parse_expression(), 809 exp.From: lambda self: self._parse_from(joins=True), 810 exp.Group: lambda self: self._parse_group(), 811 exp.Having: lambda self: self._parse_having(), 812 exp.Hint: lambda self: self._parse_hint_body(), 813 exp.Identifier: lambda self: self._parse_id_var(), 814 exp.Join: lambda self: self._parse_join(), 815 exp.Lambda: lambda self: self._parse_lambda(), 816 exp.Lateral: lambda self: self._parse_lateral(), 817 exp.Limit: lambda self: self._parse_limit(), 818 exp.Offset: lambda self: self._parse_offset(), 819 exp.Order: lambda self: self._parse_order(), 820 exp.Ordered: lambda self: self._parse_ordered(), 821 exp.Properties: lambda self: self._parse_properties(), 822 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 823 exp.Qualify: lambda self: self._parse_qualify(), 824 exp.Returning: lambda self: self._parse_returning(), 825 exp.Select: lambda self: self._parse_select(), 826 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 827 exp.Table: lambda self: self._parse_table_parts(), 828 exp.TableAlias: lambda self: self._parse_table_alias(), 829 exp.Tuple: lambda self: self._parse_value(values=False), 830 exp.Whens: lambda self: self._parse_when_matched(), 831 exp.Where: lambda self: self._parse_where(), 832 exp.Window: lambda self: self._parse_named_window(), 833 exp.With: lambda self: self._parse_with(), 834 "JOIN_TYPE": lambda self: self._parse_join_parts(), 835 } 836 837 STATEMENT_PARSERS = { 838 TokenType.ALTER: lambda self: self._parse_alter(), 839 TokenType.ANALYZE: lambda self: self._parse_analyze(), 840 TokenType.BEGIN: lambda self: self._parse_transaction(), 841 TokenType.CACHE: lambda self: self._parse_cache(), 842 TokenType.COMMENT: lambda self: self._parse_comment(), 843 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 844 TokenType.COPY: lambda self: self._parse_copy(), 845 TokenType.CREATE: lambda self: self._parse_create(), 846 TokenType.DELETE: lambda self: self._parse_delete(), 847 TokenType.DESC: lambda self: self._parse_describe(), 848 TokenType.DESCRIBE: lambda self: self._parse_describe(), 849 TokenType.DROP: lambda self: self._parse_drop(), 850 TokenType.GRANT: lambda self: self._parse_grant(), 851 TokenType.REVOKE: lambda self: self._parse_revoke(), 852 TokenType.INSERT: lambda self: self._parse_insert(), 853 TokenType.KILL: lambda self: self._parse_kill(), 854 TokenType.LOAD: lambda self: self._parse_load(), 855 TokenType.MERGE: lambda self: self._parse_merge(), 856 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 857 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 858 TokenType.REFRESH: lambda self: self._parse_refresh(), 859 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 860 TokenType.SET: lambda self: self._parse_set(), 861 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 862 TokenType.UNCACHE: lambda self: self._parse_uncache(), 863 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 864 TokenType.UPDATE: lambda self: self._parse_update(), 865 TokenType.USE: lambda self: self._parse_use(), 866 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 867 } 868 869 UNARY_PARSERS = { 870 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 871 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 872 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 873 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 874 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 875 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 876 } 877 878 STRING_PARSERS = { 879 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 880 exp.RawString, this=token.text 881 ), 882 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 883 exp.National, this=token.text 884 ), 885 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 886 TokenType.STRING: lambda self, token: self.expression( 887 exp.Literal, this=token.text, is_string=True 888 ), 889 TokenType.UNICODE_STRING: lambda self, token: self.expression( 890 exp.UnicodeString, 891 this=token.text, 892 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 893 ), 894 } 895 896 NUMERIC_PARSERS = { 897 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 898 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 899 TokenType.HEX_STRING: lambda self, token: self.expression( 900 exp.HexString, 901 this=token.text, 902 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 903 ), 904 TokenType.NUMBER: lambda self, token: self.expression( 905 exp.Literal, this=token.text, is_string=False 906 ), 907 } 908 909 PRIMARY_PARSERS = { 910 **STRING_PARSERS, 911 **NUMERIC_PARSERS, 912 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 913 TokenType.NULL: lambda self, _: self.expression(exp.Null), 914 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 915 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 916 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 917 TokenType.STAR: lambda self, _: self._parse_star_ops(), 918 } 919 920 PLACEHOLDER_PARSERS = { 921 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 922 TokenType.PARAMETER: lambda self: self._parse_parameter(), 923 TokenType.COLON: lambda self: ( 924 self.expression(exp.Placeholder, this=self._prev.text) 925 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 926 else None 927 ), 928 } 929 930 RANGE_PARSERS = { 931 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 932 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 933 TokenType.GLOB: binary_range_parser(exp.Glob), 934 TokenType.ILIKE: binary_range_parser(exp.ILike), 935 TokenType.IN: lambda self, this: self._parse_in(this), 936 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 937 TokenType.IS: lambda self, this: self._parse_is(this), 938 TokenType.LIKE: binary_range_parser(exp.Like), 939 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 940 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 941 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 942 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 943 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 944 } 945 946 PIPE_SYNTAX_TRANSFORM_PARSERS = { 947 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 948 "AS": lambda self, query: self._build_pipe_cte( 949 query, [exp.Star()], self._parse_table_alias() 950 ), 951 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 952 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 953 "ORDER BY": lambda self, query: query.order_by( 954 self._parse_order(), append=False, copy=False 955 ), 956 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 957 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 958 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 959 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 960 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 961 } 962 963 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 964 "ALLOWED_VALUES": lambda self: self.expression( 965 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 966 ), 967 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 968 "AUTO": lambda self: self._parse_auto_property(), 969 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 970 "BACKUP": lambda self: self.expression( 971 exp.BackupProperty, this=self._parse_var(any_token=True) 972 ), 973 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 974 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 975 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 976 "CHECKSUM": lambda self: self._parse_checksum(), 977 "CLUSTER BY": lambda self: self._parse_cluster(), 978 "CLUSTERED": lambda self: self._parse_clustered_by(), 979 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 980 exp.CollateProperty, **kwargs 981 ), 982 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 983 "CONTAINS": lambda self: self._parse_contains_property(), 984 "COPY": lambda self: self._parse_copy_property(), 985 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 986 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 987 "DEFINER": lambda self: self._parse_definer(), 988 "DETERMINISTIC": lambda self: self.expression( 989 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 990 ), 991 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 992 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 993 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 994 "DISTKEY": lambda self: self._parse_distkey(), 995 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 996 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 997 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 998 "ENVIRONMENT": lambda self: self.expression( 999 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1000 ), 1001 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1002 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1003 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1004 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1005 "FREESPACE": lambda self: self._parse_freespace(), 1006 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1007 "HEAP": lambda self: self.expression(exp.HeapProperty), 1008 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1009 "IMMUTABLE": lambda self: self.expression( 1010 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1011 ), 1012 "INHERITS": lambda self: self.expression( 1013 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1014 ), 1015 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1016 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1017 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1018 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1019 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1020 "LIKE": lambda self: self._parse_create_like(), 1021 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1022 "LOCK": lambda self: self._parse_locking(), 1023 "LOCKING": lambda self: self._parse_locking(), 1024 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1025 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1026 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1027 "MODIFIES": lambda self: self._parse_modifies_property(), 1028 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1029 "NO": lambda self: self._parse_no_property(), 1030 "ON": lambda self: self._parse_on_property(), 1031 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1032 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1033 "PARTITION": lambda self: self._parse_partitioned_of(), 1034 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1035 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1036 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1037 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1038 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1039 "READS": lambda self: self._parse_reads_property(), 1040 "REMOTE": lambda self: self._parse_remote_with_connection(), 1041 "RETURNS": lambda self: self._parse_returns(), 1042 "STRICT": lambda self: self.expression(exp.StrictProperty), 1043 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1044 "ROW": lambda self: self._parse_row(), 1045 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1046 "SAMPLE": lambda self: self.expression( 1047 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1048 ), 1049 "SECURE": lambda self: self.expression(exp.SecureProperty), 1050 "SECURITY": lambda self: self._parse_security(), 1051 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1052 "SETTINGS": lambda self: self._parse_settings_property(), 1053 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1054 "SORTKEY": lambda self: self._parse_sortkey(), 1055 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1056 "STABLE": lambda self: self.expression( 1057 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1058 ), 1059 "STORED": lambda self: self._parse_stored(), 1060 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1061 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1062 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1063 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1064 "TO": lambda self: self._parse_to_table(), 1065 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1066 "TRANSFORM": lambda self: self.expression( 1067 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1068 ), 1069 "TTL": lambda self: self._parse_ttl(), 1070 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1071 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1072 "VOLATILE": lambda self: self._parse_volatile_property(), 1073 "WITH": lambda self: self._parse_with_property(), 1074 } 1075 1076 CONSTRAINT_PARSERS = { 1077 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1078 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1079 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1080 "CHARACTER SET": lambda self: self.expression( 1081 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1082 ), 1083 "CHECK": lambda self: self.expression( 1084 exp.CheckColumnConstraint, 1085 this=self._parse_wrapped(self._parse_assignment), 1086 enforced=self._match_text_seq("ENFORCED"), 1087 ), 1088 "COLLATE": lambda self: self.expression( 1089 exp.CollateColumnConstraint, 1090 this=self._parse_identifier() or self._parse_column(), 1091 ), 1092 "COMMENT": lambda self: self.expression( 1093 exp.CommentColumnConstraint, this=self._parse_string() 1094 ), 1095 "COMPRESS": lambda self: self._parse_compress(), 1096 "CLUSTERED": lambda self: self.expression( 1097 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1098 ), 1099 "NONCLUSTERED": lambda self: self.expression( 1100 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1101 ), 1102 "DEFAULT": lambda self: self.expression( 1103 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1104 ), 1105 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1106 "EPHEMERAL": lambda self: self.expression( 1107 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1108 ), 1109 "EXCLUDE": lambda self: self.expression( 1110 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1111 ), 1112 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1113 "FORMAT": lambda self: self.expression( 1114 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1115 ), 1116 "GENERATED": lambda self: self._parse_generated_as_identity(), 1117 "IDENTITY": lambda self: self._parse_auto_increment(), 1118 "INLINE": lambda self: self._parse_inline(), 1119 "LIKE": lambda self: self._parse_create_like(), 1120 "NOT": lambda self: self._parse_not_constraint(), 1121 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1122 "ON": lambda self: ( 1123 self._match(TokenType.UPDATE) 1124 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1125 ) 1126 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1127 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1128 "PERIOD": lambda self: self._parse_period_for_system_time(), 1129 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1130 "REFERENCES": lambda self: self._parse_references(match=False), 1131 "TITLE": lambda self: self.expression( 1132 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1133 ), 1134 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1135 "UNIQUE": lambda self: self._parse_unique(), 1136 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1137 "WATERMARK": lambda self: self.expression( 1138 exp.WatermarkColumnConstraint, 1139 this=self._match(TokenType.FOR) and self._parse_column(), 1140 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1141 ), 1142 "WITH": lambda self: self.expression( 1143 exp.Properties, expressions=self._parse_wrapped_properties() 1144 ), 1145 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1146 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1147 } 1148 1149 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1150 if not self._match(TokenType.L_PAREN, advance=False): 1151 # Partitioning by bucket or truncate follows the syntax: 1152 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1153 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1154 self._retreat(self._index - 1) 1155 return None 1156 1157 klass = ( 1158 exp.PartitionedByBucket 1159 if self._prev.text.upper() == "BUCKET" 1160 else exp.PartitionByTruncate 1161 ) 1162 1163 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1164 this, expression = seq_get(args, 0), seq_get(args, 1) 1165 1166 if isinstance(this, exp.Literal): 1167 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1168 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1169 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1170 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1171 # 1172 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1173 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1174 this, expression = expression, this 1175 1176 return self.expression(klass, this=this, expression=expression) 1177 1178 ALTER_PARSERS = { 1179 "ADD": lambda self: self._parse_alter_table_add(), 1180 "AS": lambda self: self._parse_select(), 1181 "ALTER": lambda self: self._parse_alter_table_alter(), 1182 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1183 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1184 "DROP": lambda self: self._parse_alter_table_drop(), 1185 "RENAME": lambda self: self._parse_alter_table_rename(), 1186 "SET": lambda self: self._parse_alter_table_set(), 1187 "SWAP": lambda self: self.expression( 1188 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1189 ), 1190 } 1191 1192 ALTER_ALTER_PARSERS = { 1193 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1194 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1195 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1196 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1197 } 1198 1199 SCHEMA_UNNAMED_CONSTRAINTS = { 1200 "CHECK", 1201 "EXCLUDE", 1202 "FOREIGN KEY", 1203 "LIKE", 1204 "PERIOD", 1205 "PRIMARY KEY", 1206 "UNIQUE", 1207 "WATERMARK", 1208 "BUCKET", 1209 "TRUNCATE", 1210 } 1211 1212 NO_PAREN_FUNCTION_PARSERS = { 1213 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1214 "CASE": lambda self: self._parse_case(), 1215 "CONNECT_BY_ROOT": lambda self: self.expression( 1216 exp.ConnectByRoot, this=self._parse_column() 1217 ), 1218 "IF": lambda self: self._parse_if(), 1219 } 1220 1221 INVALID_FUNC_NAME_TOKENS = { 1222 TokenType.IDENTIFIER, 1223 TokenType.STRING, 1224 } 1225 1226 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1227 1228 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1229 1230 FUNCTION_PARSERS = { 1231 **{ 1232 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1233 }, 1234 **{ 1235 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1236 }, 1237 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1238 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1239 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1240 "DECODE": lambda self: self._parse_decode(), 1241 "EXTRACT": lambda self: self._parse_extract(), 1242 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1243 "GAP_FILL": lambda self: self._parse_gap_fill(), 1244 "JSON_OBJECT": lambda self: self._parse_json_object(), 1245 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1246 "JSON_TABLE": lambda self: self._parse_json_table(), 1247 "MATCH": lambda self: self._parse_match_against(), 1248 "NORMALIZE": lambda self: self._parse_normalize(), 1249 "OPENJSON": lambda self: self._parse_open_json(), 1250 "OVERLAY": lambda self: self._parse_overlay(), 1251 "POSITION": lambda self: self._parse_position(), 1252 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1253 "STRING_AGG": lambda self: self._parse_string_agg(), 1254 "SUBSTRING": lambda self: self._parse_substring(), 1255 "TRIM": lambda self: self._parse_trim(), 1256 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1257 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1258 "XMLELEMENT": lambda self: self.expression( 1259 exp.XMLElement, 1260 this=self._match_text_seq("NAME") and self._parse_id_var(), 1261 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1262 ), 1263 "XMLTABLE": lambda self: self._parse_xml_table(), 1264 } 1265 1266 QUERY_MODIFIER_PARSERS = { 1267 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1268 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1269 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1270 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1271 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1272 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1273 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1274 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1275 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1276 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1277 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1278 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1279 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1280 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1281 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1282 TokenType.CLUSTER_BY: lambda self: ( 1283 "cluster", 1284 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1285 ), 1286 TokenType.DISTRIBUTE_BY: lambda self: ( 1287 "distribute", 1288 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1289 ), 1290 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1291 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1292 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1293 } 1294 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1295 1296 SET_PARSERS = { 1297 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1298 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1299 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1300 "TRANSACTION": lambda self: self._parse_set_transaction(), 1301 } 1302 1303 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1304 1305 TYPE_LITERAL_PARSERS = { 1306 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1307 } 1308 1309 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1310 1311 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1312 1313 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1314 1315 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1316 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1317 "ISOLATION": ( 1318 ("LEVEL", "REPEATABLE", "READ"), 1319 ("LEVEL", "READ", "COMMITTED"), 1320 ("LEVEL", "READ", "UNCOMITTED"), 1321 ("LEVEL", "SERIALIZABLE"), 1322 ), 1323 "READ": ("WRITE", "ONLY"), 1324 } 1325 1326 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1327 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1328 ) 1329 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1330 1331 CREATE_SEQUENCE: OPTIONS_TYPE = { 1332 "SCALE": ("EXTEND", "NOEXTEND"), 1333 "SHARD": ("EXTEND", "NOEXTEND"), 1334 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1335 **dict.fromkeys( 1336 ( 1337 "SESSION", 1338 "GLOBAL", 1339 "KEEP", 1340 "NOKEEP", 1341 "ORDER", 1342 "NOORDER", 1343 "NOCACHE", 1344 "CYCLE", 1345 "NOCYCLE", 1346 "NOMINVALUE", 1347 "NOMAXVALUE", 1348 "NOSCALE", 1349 "NOSHARD", 1350 ), 1351 tuple(), 1352 ), 1353 } 1354 1355 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1356 1357 USABLES: OPTIONS_TYPE = dict.fromkeys( 1358 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1359 ) 1360 1361 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1362 1363 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1364 "TYPE": ("EVOLUTION",), 1365 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1366 } 1367 1368 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1369 1370 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1371 1372 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1373 "NOT": ("ENFORCED",), 1374 "MATCH": ( 1375 "FULL", 1376 "PARTIAL", 1377 "SIMPLE", 1378 ), 1379 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1380 "USING": ( 1381 "BTREE", 1382 "HASH", 1383 ), 1384 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1385 } 1386 1387 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1388 "NO": ("OTHERS",), 1389 "CURRENT": ("ROW",), 1390 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1391 } 1392 1393 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1394 1395 CLONE_KEYWORDS = {"CLONE", "COPY"} 1396 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1397 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1398 1399 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1400 1401 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1402 1403 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1404 1405 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1406 1407 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1408 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1409 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1410 1411 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1412 1413 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1414 1415 ADD_CONSTRAINT_TOKENS = { 1416 TokenType.CONSTRAINT, 1417 TokenType.FOREIGN_KEY, 1418 TokenType.INDEX, 1419 TokenType.KEY, 1420 TokenType.PRIMARY_KEY, 1421 TokenType.UNIQUE, 1422 } 1423 1424 DISTINCT_TOKENS = {TokenType.DISTINCT} 1425 1426 NULL_TOKENS = {TokenType.NULL} 1427 1428 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1429 1430 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1431 1432 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1433 1434 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1435 1436 ODBC_DATETIME_LITERALS = { 1437 "d": exp.Date, 1438 "t": exp.Time, 1439 "ts": exp.Timestamp, 1440 } 1441 1442 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1443 1444 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1445 1446 # The style options for the DESCRIBE statement 1447 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1448 1449 # The style options for the ANALYZE statement 1450 ANALYZE_STYLES = { 1451 "BUFFER_USAGE_LIMIT", 1452 "FULL", 1453 "LOCAL", 1454 "NO_WRITE_TO_BINLOG", 1455 "SAMPLE", 1456 "SKIP_LOCKED", 1457 "VERBOSE", 1458 } 1459 1460 ANALYZE_EXPRESSION_PARSERS = { 1461 "ALL": lambda self: self._parse_analyze_columns(), 1462 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1463 "DELETE": lambda self: self._parse_analyze_delete(), 1464 "DROP": lambda self: self._parse_analyze_histogram(), 1465 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1466 "LIST": lambda self: self._parse_analyze_list(), 1467 "PREDICATE": lambda self: self._parse_analyze_columns(), 1468 "UPDATE": lambda self: self._parse_analyze_histogram(), 1469 "VALIDATE": lambda self: self._parse_analyze_validate(), 1470 } 1471 1472 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1473 1474 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1475 1476 OPERATION_MODIFIERS: t.Set[str] = set() 1477 1478 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1479 1480 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1481 1482 STRICT_CAST = True 1483 1484 PREFIXED_PIVOT_COLUMNS = False 1485 IDENTIFY_PIVOT_STRINGS = False 1486 1487 LOG_DEFAULTS_TO_LN = False 1488 1489 # Whether the table sample clause expects CSV syntax 1490 TABLESAMPLE_CSV = False 1491 1492 # The default method used for table sampling 1493 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1494 1495 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1496 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1497 1498 # Whether the TRIM function expects the characters to trim as its first argument 1499 TRIM_PATTERN_FIRST = False 1500 1501 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1502 STRING_ALIASES = False 1503 1504 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1505 MODIFIERS_ATTACHED_TO_SET_OP = True 1506 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1507 1508 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1509 NO_PAREN_IF_COMMANDS = True 1510 1511 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1512 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1513 1514 # Whether the `:` operator is used to extract a value from a VARIANT column 1515 COLON_IS_VARIANT_EXTRACT = False 1516 1517 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1518 # If this is True and '(' is not found, the keyword will be treated as an identifier 1519 VALUES_FOLLOWED_BY_PAREN = True 1520 1521 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1522 SUPPORTS_IMPLICIT_UNNEST = False 1523 1524 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1525 INTERVAL_SPANS = True 1526 1527 # Whether a PARTITION clause can follow a table reference 1528 SUPPORTS_PARTITION_SELECTION = False 1529 1530 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1531 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1532 1533 # Whether the 'AS' keyword is optional in the CTE definition syntax 1534 OPTIONAL_ALIAS_TOKEN_CTE = True 1535 1536 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1537 ALTER_RENAME_REQUIRES_COLUMN = True 1538 1539 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1540 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1541 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1542 # as BigQuery, where all joins have the same precedence. 1543 JOINS_HAVE_EQUAL_PRECEDENCE = False 1544 1545 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1546 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1547 1548 # Whether map literals support arbitrary expressions as keys. 1549 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1550 # When False, keys are typically restricted to identifiers. 1551 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1552 1553 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1554 # is true for Snowflake but not for BigQuery which can also process strings 1555 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1556 1557 __slots__ = ( 1558 "error_level", 1559 "error_message_context", 1560 "max_errors", 1561 "dialect", 1562 "sql", 1563 "errors", 1564 "_tokens", 1565 "_index", 1566 "_curr", 1567 "_next", 1568 "_prev", 1569 "_prev_comments", 1570 "_pipe_cte_counter", 1571 ) 1572 1573 # Autofilled 1574 SHOW_TRIE: t.Dict = {} 1575 SET_TRIE: t.Dict = {} 1576 1577 def __init__( 1578 self, 1579 error_level: t.Optional[ErrorLevel] = None, 1580 error_message_context: int = 100, 1581 max_errors: int = 3, 1582 dialect: DialectType = None, 1583 ): 1584 from sqlglot.dialects import Dialect 1585 1586 self.error_level = error_level or ErrorLevel.IMMEDIATE 1587 self.error_message_context = error_message_context 1588 self.max_errors = max_errors 1589 self.dialect = Dialect.get_or_raise(dialect) 1590 self.reset() 1591 1592 def reset(self): 1593 self.sql = "" 1594 self.errors = [] 1595 self._tokens = [] 1596 self._index = 0 1597 self._curr = None 1598 self._next = None 1599 self._prev = None 1600 self._prev_comments = None 1601 self._pipe_cte_counter = 0 1602 1603 def parse( 1604 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1605 ) -> t.List[t.Optional[exp.Expression]]: 1606 """ 1607 Parses a list of tokens and returns a list of syntax trees, one tree 1608 per parsed SQL statement. 1609 1610 Args: 1611 raw_tokens: The list of tokens. 1612 sql: The original SQL string, used to produce helpful debug messages. 1613 1614 Returns: 1615 The list of the produced syntax trees. 1616 """ 1617 return self._parse( 1618 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1619 ) 1620 1621 def parse_into( 1622 self, 1623 expression_types: exp.IntoType, 1624 raw_tokens: t.List[Token], 1625 sql: t.Optional[str] = None, 1626 ) -> t.List[t.Optional[exp.Expression]]: 1627 """ 1628 Parses a list of tokens into a given Expression type. If a collection of Expression 1629 types is given instead, this method will try to parse the token list into each one 1630 of them, stopping at the first for which the parsing succeeds. 1631 1632 Args: 1633 expression_types: The expression type(s) to try and parse the token list into. 1634 raw_tokens: The list of tokens. 1635 sql: The original SQL string, used to produce helpful debug messages. 1636 1637 Returns: 1638 The target Expression. 1639 """ 1640 errors = [] 1641 for expression_type in ensure_list(expression_types): 1642 parser = self.EXPRESSION_PARSERS.get(expression_type) 1643 if not parser: 1644 raise TypeError(f"No parser registered for {expression_type}") 1645 1646 try: 1647 return self._parse(parser, raw_tokens, sql) 1648 except ParseError as e: 1649 e.errors[0]["into_expression"] = expression_type 1650 errors.append(e) 1651 1652 raise ParseError( 1653 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1654 errors=merge_errors(errors), 1655 ) from errors[-1] 1656 1657 def _parse( 1658 self, 1659 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1660 raw_tokens: t.List[Token], 1661 sql: t.Optional[str] = None, 1662 ) -> t.List[t.Optional[exp.Expression]]: 1663 self.reset() 1664 self.sql = sql or "" 1665 1666 total = len(raw_tokens) 1667 chunks: t.List[t.List[Token]] = [[]] 1668 1669 for i, token in enumerate(raw_tokens): 1670 if token.token_type == TokenType.SEMICOLON: 1671 if token.comments: 1672 chunks.append([token]) 1673 1674 if i < total - 1: 1675 chunks.append([]) 1676 else: 1677 chunks[-1].append(token) 1678 1679 expressions = [] 1680 1681 for tokens in chunks: 1682 self._index = -1 1683 self._tokens = tokens 1684 self._advance() 1685 1686 expressions.append(parse_method(self)) 1687 1688 if self._index < len(self._tokens): 1689 self.raise_error("Invalid expression / Unexpected token") 1690 1691 self.check_errors() 1692 1693 return expressions 1694 1695 def check_errors(self) -> None: 1696 """Logs or raises any found errors, depending on the chosen error level setting.""" 1697 if self.error_level == ErrorLevel.WARN: 1698 for error in self.errors: 1699 logger.error(str(error)) 1700 elif self.error_level == ErrorLevel.RAISE and self.errors: 1701 raise ParseError( 1702 concat_messages(self.errors, self.max_errors), 1703 errors=merge_errors(self.errors), 1704 ) 1705 1706 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1707 """ 1708 Appends an error in the list of recorded errors or raises it, depending on the chosen 1709 error level setting. 1710 """ 1711 token = token or self._curr or self._prev or Token.string("") 1712 start = token.start 1713 end = token.end + 1 1714 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1715 highlight = self.sql[start:end] 1716 end_context = self.sql[end : end + self.error_message_context] 1717 1718 error = ParseError.new( 1719 f"{message}. Line {token.line}, Col: {token.col}.\n" 1720 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1721 description=message, 1722 line=token.line, 1723 col=token.col, 1724 start_context=start_context, 1725 highlight=highlight, 1726 end_context=end_context, 1727 ) 1728 1729 if self.error_level == ErrorLevel.IMMEDIATE: 1730 raise error 1731 1732 self.errors.append(error) 1733 1734 def expression( 1735 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1736 ) -> E: 1737 """ 1738 Creates a new, validated Expression. 1739 1740 Args: 1741 exp_class: The expression class to instantiate. 1742 comments: An optional list of comments to attach to the expression. 1743 kwargs: The arguments to set for the expression along with their respective values. 1744 1745 Returns: 1746 The target expression. 1747 """ 1748 instance = exp_class(**kwargs) 1749 instance.add_comments(comments) if comments else self._add_comments(instance) 1750 return self.validate_expression(instance) 1751 1752 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1753 if expression and self._prev_comments: 1754 expression.add_comments(self._prev_comments) 1755 self._prev_comments = None 1756 1757 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1758 """ 1759 Validates an Expression, making sure that all its mandatory arguments are set. 1760 1761 Args: 1762 expression: The expression to validate. 1763 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1764 1765 Returns: 1766 The validated expression. 1767 """ 1768 if self.error_level != ErrorLevel.IGNORE: 1769 for error_message in expression.error_messages(args): 1770 self.raise_error(error_message) 1771 1772 return expression 1773 1774 def _find_sql(self, start: Token, end: Token) -> str: 1775 return self.sql[start.start : end.end + 1] 1776 1777 def _is_connected(self) -> bool: 1778 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1779 1780 def _advance(self, times: int = 1) -> None: 1781 self._index += times 1782 self._curr = seq_get(self._tokens, self._index) 1783 self._next = seq_get(self._tokens, self._index + 1) 1784 1785 if self._index > 0: 1786 self._prev = self._tokens[self._index - 1] 1787 self._prev_comments = self._prev.comments 1788 else: 1789 self._prev = None 1790 self._prev_comments = None 1791 1792 def _retreat(self, index: int) -> None: 1793 if index != self._index: 1794 self._advance(index - self._index) 1795 1796 def _warn_unsupported(self) -> None: 1797 if len(self._tokens) <= 1: 1798 return 1799 1800 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1801 # interested in emitting a warning for the one being currently processed. 1802 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1803 1804 logger.warning( 1805 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1806 ) 1807 1808 def _parse_command(self) -> exp.Command: 1809 self._warn_unsupported() 1810 return self.expression( 1811 exp.Command, 1812 comments=self._prev_comments, 1813 this=self._prev.text.upper(), 1814 expression=self._parse_string(), 1815 ) 1816 1817 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1818 """ 1819 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1820 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1821 solve this by setting & resetting the parser state accordingly 1822 """ 1823 index = self._index 1824 error_level = self.error_level 1825 1826 self.error_level = ErrorLevel.IMMEDIATE 1827 try: 1828 this = parse_method() 1829 except ParseError: 1830 this = None 1831 finally: 1832 if not this or retreat: 1833 self._retreat(index) 1834 self.error_level = error_level 1835 1836 return this 1837 1838 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1839 start = self._prev 1840 exists = self._parse_exists() if allow_exists else None 1841 1842 self._match(TokenType.ON) 1843 1844 materialized = self._match_text_seq("MATERIALIZED") 1845 kind = self._match_set(self.CREATABLES) and self._prev 1846 if not kind: 1847 return self._parse_as_command(start) 1848 1849 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1850 this = self._parse_user_defined_function(kind=kind.token_type) 1851 elif kind.token_type == TokenType.TABLE: 1852 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1853 elif kind.token_type == TokenType.COLUMN: 1854 this = self._parse_column() 1855 else: 1856 this = self._parse_id_var() 1857 1858 self._match(TokenType.IS) 1859 1860 return self.expression( 1861 exp.Comment, 1862 this=this, 1863 kind=kind.text, 1864 expression=self._parse_string(), 1865 exists=exists, 1866 materialized=materialized, 1867 ) 1868 1869 def _parse_to_table( 1870 self, 1871 ) -> exp.ToTableProperty: 1872 table = self._parse_table_parts(schema=True) 1873 return self.expression(exp.ToTableProperty, this=table) 1874 1875 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1876 def _parse_ttl(self) -> exp.Expression: 1877 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1878 this = self._parse_bitwise() 1879 1880 if self._match_text_seq("DELETE"): 1881 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1882 if self._match_text_seq("RECOMPRESS"): 1883 return self.expression( 1884 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1885 ) 1886 if self._match_text_seq("TO", "DISK"): 1887 return self.expression( 1888 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1889 ) 1890 if self._match_text_seq("TO", "VOLUME"): 1891 return self.expression( 1892 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1893 ) 1894 1895 return this 1896 1897 expressions = self._parse_csv(_parse_ttl_action) 1898 where = self._parse_where() 1899 group = self._parse_group() 1900 1901 aggregates = None 1902 if group and self._match(TokenType.SET): 1903 aggregates = self._parse_csv(self._parse_set_item) 1904 1905 return self.expression( 1906 exp.MergeTreeTTL, 1907 expressions=expressions, 1908 where=where, 1909 group=group, 1910 aggregates=aggregates, 1911 ) 1912 1913 def _parse_statement(self) -> t.Optional[exp.Expression]: 1914 if self._curr is None: 1915 return None 1916 1917 if self._match_set(self.STATEMENT_PARSERS): 1918 comments = self._prev_comments 1919 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1920 stmt.add_comments(comments, prepend=True) 1921 return stmt 1922 1923 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1924 return self._parse_command() 1925 1926 expression = self._parse_expression() 1927 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1928 return self._parse_query_modifiers(expression) 1929 1930 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1931 start = self._prev 1932 temporary = self._match(TokenType.TEMPORARY) 1933 materialized = self._match_text_seq("MATERIALIZED") 1934 1935 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1936 if not kind: 1937 return self._parse_as_command(start) 1938 1939 concurrently = self._match_text_seq("CONCURRENTLY") 1940 if_exists = exists or self._parse_exists() 1941 1942 if kind == "COLUMN": 1943 this = self._parse_column() 1944 else: 1945 this = self._parse_table_parts( 1946 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1947 ) 1948 1949 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1950 1951 if self._match(TokenType.L_PAREN, advance=False): 1952 expressions = self._parse_wrapped_csv(self._parse_types) 1953 else: 1954 expressions = None 1955 1956 return self.expression( 1957 exp.Drop, 1958 exists=if_exists, 1959 this=this, 1960 expressions=expressions, 1961 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1962 temporary=temporary, 1963 materialized=materialized, 1964 cascade=self._match_text_seq("CASCADE"), 1965 constraints=self._match_text_seq("CONSTRAINTS"), 1966 purge=self._match_text_seq("PURGE"), 1967 cluster=cluster, 1968 concurrently=concurrently, 1969 ) 1970 1971 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1972 return ( 1973 self._match_text_seq("IF") 1974 and (not not_ or self._match(TokenType.NOT)) 1975 and self._match(TokenType.EXISTS) 1976 ) 1977 1978 def _parse_create(self) -> exp.Create | exp.Command: 1979 # Note: this can't be None because we've matched a statement parser 1980 start = self._prev 1981 1982 replace = ( 1983 start.token_type == TokenType.REPLACE 1984 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1985 or self._match_pair(TokenType.OR, TokenType.ALTER) 1986 ) 1987 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1988 1989 unique = self._match(TokenType.UNIQUE) 1990 1991 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1992 clustered = True 1993 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1994 "COLUMNSTORE" 1995 ): 1996 clustered = False 1997 else: 1998 clustered = None 1999 2000 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2001 self._advance() 2002 2003 properties = None 2004 create_token = self._match_set(self.CREATABLES) and self._prev 2005 2006 if not create_token: 2007 # exp.Properties.Location.POST_CREATE 2008 properties = self._parse_properties() 2009 create_token = self._match_set(self.CREATABLES) and self._prev 2010 2011 if not properties or not create_token: 2012 return self._parse_as_command(start) 2013 2014 concurrently = self._match_text_seq("CONCURRENTLY") 2015 exists = self._parse_exists(not_=True) 2016 this = None 2017 expression: t.Optional[exp.Expression] = None 2018 indexes = None 2019 no_schema_binding = None 2020 begin = None 2021 end = None 2022 clone = None 2023 2024 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2025 nonlocal properties 2026 if properties and temp_props: 2027 properties.expressions.extend(temp_props.expressions) 2028 elif temp_props: 2029 properties = temp_props 2030 2031 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2032 this = self._parse_user_defined_function(kind=create_token.token_type) 2033 2034 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2035 extend_props(self._parse_properties()) 2036 2037 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2038 extend_props(self._parse_properties()) 2039 2040 if not expression: 2041 if self._match(TokenType.COMMAND): 2042 expression = self._parse_as_command(self._prev) 2043 else: 2044 begin = self._match(TokenType.BEGIN) 2045 return_ = self._match_text_seq("RETURN") 2046 2047 if self._match(TokenType.STRING, advance=False): 2048 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2049 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2050 expression = self._parse_string() 2051 extend_props(self._parse_properties()) 2052 else: 2053 expression = self._parse_user_defined_function_expression() 2054 2055 end = self._match_text_seq("END") 2056 2057 if return_: 2058 expression = self.expression(exp.Return, this=expression) 2059 elif create_token.token_type == TokenType.INDEX: 2060 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2061 if not self._match(TokenType.ON): 2062 index = self._parse_id_var() 2063 anonymous = False 2064 else: 2065 index = None 2066 anonymous = True 2067 2068 this = self._parse_index(index=index, anonymous=anonymous) 2069 elif create_token.token_type in self.DB_CREATABLES: 2070 table_parts = self._parse_table_parts( 2071 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2072 ) 2073 2074 # exp.Properties.Location.POST_NAME 2075 self._match(TokenType.COMMA) 2076 extend_props(self._parse_properties(before=True)) 2077 2078 this = self._parse_schema(this=table_parts) 2079 2080 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2081 extend_props(self._parse_properties()) 2082 2083 has_alias = self._match(TokenType.ALIAS) 2084 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2085 # exp.Properties.Location.POST_ALIAS 2086 extend_props(self._parse_properties()) 2087 2088 if create_token.token_type == TokenType.SEQUENCE: 2089 expression = self._parse_types() 2090 props = self._parse_properties() 2091 if props: 2092 sequence_props = exp.SequenceProperties() 2093 options = [] 2094 for prop in props: 2095 if isinstance(prop, exp.SequenceProperties): 2096 for arg, value in prop.args.items(): 2097 if arg == "options": 2098 options.extend(value) 2099 else: 2100 sequence_props.set(arg, value) 2101 prop.pop() 2102 2103 if options: 2104 sequence_props.set("options", options) 2105 2106 props.append("expressions", sequence_props) 2107 extend_props(props) 2108 else: 2109 expression = self._parse_ddl_select() 2110 2111 # Some dialects also support using a table as an alias instead of a SELECT. 2112 # Here we fallback to this as an alternative. 2113 if not expression and has_alias: 2114 expression = self._try_parse(self._parse_table_parts) 2115 2116 if create_token.token_type == TokenType.TABLE: 2117 # exp.Properties.Location.POST_EXPRESSION 2118 extend_props(self._parse_properties()) 2119 2120 indexes = [] 2121 while True: 2122 index = self._parse_index() 2123 2124 # exp.Properties.Location.POST_INDEX 2125 extend_props(self._parse_properties()) 2126 if not index: 2127 break 2128 else: 2129 self._match(TokenType.COMMA) 2130 indexes.append(index) 2131 elif create_token.token_type == TokenType.VIEW: 2132 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2133 no_schema_binding = True 2134 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2135 extend_props(self._parse_properties()) 2136 2137 shallow = self._match_text_seq("SHALLOW") 2138 2139 if self._match_texts(self.CLONE_KEYWORDS): 2140 copy = self._prev.text.lower() == "copy" 2141 clone = self.expression( 2142 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2143 ) 2144 2145 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2146 return self._parse_as_command(start) 2147 2148 create_kind_text = create_token.text.upper() 2149 return self.expression( 2150 exp.Create, 2151 this=this, 2152 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2153 replace=replace, 2154 refresh=refresh, 2155 unique=unique, 2156 expression=expression, 2157 exists=exists, 2158 properties=properties, 2159 indexes=indexes, 2160 no_schema_binding=no_schema_binding, 2161 begin=begin, 2162 end=end, 2163 clone=clone, 2164 concurrently=concurrently, 2165 clustered=clustered, 2166 ) 2167 2168 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2169 seq = exp.SequenceProperties() 2170 2171 options = [] 2172 index = self._index 2173 2174 while self._curr: 2175 self._match(TokenType.COMMA) 2176 if self._match_text_seq("INCREMENT"): 2177 self._match_text_seq("BY") 2178 self._match_text_seq("=") 2179 seq.set("increment", self._parse_term()) 2180 elif self._match_text_seq("MINVALUE"): 2181 seq.set("minvalue", self._parse_term()) 2182 elif self._match_text_seq("MAXVALUE"): 2183 seq.set("maxvalue", self._parse_term()) 2184 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2185 self._match_text_seq("=") 2186 seq.set("start", self._parse_term()) 2187 elif self._match_text_seq("CACHE"): 2188 # T-SQL allows empty CACHE which is initialized dynamically 2189 seq.set("cache", self._parse_number() or True) 2190 elif self._match_text_seq("OWNED", "BY"): 2191 # "OWNED BY NONE" is the default 2192 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2193 else: 2194 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2195 if opt: 2196 options.append(opt) 2197 else: 2198 break 2199 2200 seq.set("options", options if options else None) 2201 return None if self._index == index else seq 2202 2203 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2204 # only used for teradata currently 2205 self._match(TokenType.COMMA) 2206 2207 kwargs = { 2208 "no": self._match_text_seq("NO"), 2209 "dual": self._match_text_seq("DUAL"), 2210 "before": self._match_text_seq("BEFORE"), 2211 "default": self._match_text_seq("DEFAULT"), 2212 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2213 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2214 "after": self._match_text_seq("AFTER"), 2215 "minimum": self._match_texts(("MIN", "MINIMUM")), 2216 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2217 } 2218 2219 if self._match_texts(self.PROPERTY_PARSERS): 2220 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2221 try: 2222 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2223 except TypeError: 2224 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2225 2226 return None 2227 2228 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2229 return self._parse_wrapped_csv(self._parse_property) 2230 2231 def _parse_property(self) -> t.Optional[exp.Expression]: 2232 if self._match_texts(self.PROPERTY_PARSERS): 2233 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2234 2235 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2236 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2237 2238 if self._match_text_seq("COMPOUND", "SORTKEY"): 2239 return self._parse_sortkey(compound=True) 2240 2241 if self._match_text_seq("SQL", "SECURITY"): 2242 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2243 2244 index = self._index 2245 2246 seq_props = self._parse_sequence_properties() 2247 if seq_props: 2248 return seq_props 2249 2250 self._retreat(index) 2251 key = self._parse_column() 2252 2253 if not self._match(TokenType.EQ): 2254 self._retreat(index) 2255 return None 2256 2257 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2258 if isinstance(key, exp.Column): 2259 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2260 2261 value = self._parse_bitwise() or self._parse_var(any_token=True) 2262 2263 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2264 if isinstance(value, exp.Column): 2265 value = exp.var(value.name) 2266 2267 return self.expression(exp.Property, this=key, value=value) 2268 2269 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2270 if self._match_text_seq("BY"): 2271 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2272 2273 self._match(TokenType.ALIAS) 2274 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2275 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2276 2277 return self.expression( 2278 exp.FileFormatProperty, 2279 this=( 2280 self.expression( 2281 exp.InputOutputFormat, 2282 input_format=input_format, 2283 output_format=output_format, 2284 ) 2285 if input_format or output_format 2286 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2287 ), 2288 hive_format=True, 2289 ) 2290 2291 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2292 field = self._parse_field() 2293 if isinstance(field, exp.Identifier) and not field.quoted: 2294 field = exp.var(field) 2295 2296 return field 2297 2298 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2299 self._match(TokenType.EQ) 2300 self._match(TokenType.ALIAS) 2301 2302 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2303 2304 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2305 properties = [] 2306 while True: 2307 if before: 2308 prop = self._parse_property_before() 2309 else: 2310 prop = self._parse_property() 2311 if not prop: 2312 break 2313 for p in ensure_list(prop): 2314 properties.append(p) 2315 2316 if properties: 2317 return self.expression(exp.Properties, expressions=properties) 2318 2319 return None 2320 2321 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2322 return self.expression( 2323 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2324 ) 2325 2326 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2327 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2328 security_specifier = self._prev.text.upper() 2329 return self.expression(exp.SecurityProperty, this=security_specifier) 2330 return None 2331 2332 def _parse_settings_property(self) -> exp.SettingsProperty: 2333 return self.expression( 2334 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2335 ) 2336 2337 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2338 if self._index >= 2: 2339 pre_volatile_token = self._tokens[self._index - 2] 2340 else: 2341 pre_volatile_token = None 2342 2343 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2344 return exp.VolatileProperty() 2345 2346 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2347 2348 def _parse_retention_period(self) -> exp.Var: 2349 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2350 number = self._parse_number() 2351 number_str = f"{number} " if number else "" 2352 unit = self._parse_var(any_token=True) 2353 return exp.var(f"{number_str}{unit}") 2354 2355 def _parse_system_versioning_property( 2356 self, with_: bool = False 2357 ) -> exp.WithSystemVersioningProperty: 2358 self._match(TokenType.EQ) 2359 prop = self.expression( 2360 exp.WithSystemVersioningProperty, 2361 **{ # type: ignore 2362 "on": True, 2363 "with": with_, 2364 }, 2365 ) 2366 2367 if self._match_text_seq("OFF"): 2368 prop.set("on", False) 2369 return prop 2370 2371 self._match(TokenType.ON) 2372 if self._match(TokenType.L_PAREN): 2373 while self._curr and not self._match(TokenType.R_PAREN): 2374 if self._match_text_seq("HISTORY_TABLE", "="): 2375 prop.set("this", self._parse_table_parts()) 2376 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2377 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2378 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2379 prop.set("retention_period", self._parse_retention_period()) 2380 2381 self._match(TokenType.COMMA) 2382 2383 return prop 2384 2385 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2386 self._match(TokenType.EQ) 2387 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2388 prop = self.expression(exp.DataDeletionProperty, on=on) 2389 2390 if self._match(TokenType.L_PAREN): 2391 while self._curr and not self._match(TokenType.R_PAREN): 2392 if self._match_text_seq("FILTER_COLUMN", "="): 2393 prop.set("filter_column", self._parse_column()) 2394 elif self._match_text_seq("RETENTION_PERIOD", "="): 2395 prop.set("retention_period", self._parse_retention_period()) 2396 2397 self._match(TokenType.COMMA) 2398 2399 return prop 2400 2401 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2402 kind = "HASH" 2403 expressions: t.Optional[t.List[exp.Expression]] = None 2404 if self._match_text_seq("BY", "HASH"): 2405 expressions = self._parse_wrapped_csv(self._parse_id_var) 2406 elif self._match_text_seq("BY", "RANDOM"): 2407 kind = "RANDOM" 2408 2409 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2410 buckets: t.Optional[exp.Expression] = None 2411 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2412 buckets = self._parse_number() 2413 2414 return self.expression( 2415 exp.DistributedByProperty, 2416 expressions=expressions, 2417 kind=kind, 2418 buckets=buckets, 2419 order=self._parse_order(), 2420 ) 2421 2422 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2423 self._match_text_seq("KEY") 2424 expressions = self._parse_wrapped_id_vars() 2425 return self.expression(expr_type, expressions=expressions) 2426 2427 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2428 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2429 prop = self._parse_system_versioning_property(with_=True) 2430 self._match_r_paren() 2431 return prop 2432 2433 if self._match(TokenType.L_PAREN, advance=False): 2434 return self._parse_wrapped_properties() 2435 2436 if self._match_text_seq("JOURNAL"): 2437 return self._parse_withjournaltable() 2438 2439 if self._match_texts(self.VIEW_ATTRIBUTES): 2440 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2441 2442 if self._match_text_seq("DATA"): 2443 return self._parse_withdata(no=False) 2444 elif self._match_text_seq("NO", "DATA"): 2445 return self._parse_withdata(no=True) 2446 2447 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2448 return self._parse_serde_properties(with_=True) 2449 2450 if self._match(TokenType.SCHEMA): 2451 return self.expression( 2452 exp.WithSchemaBindingProperty, 2453 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2454 ) 2455 2456 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2457 return self.expression( 2458 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2459 ) 2460 2461 if not self._next: 2462 return None 2463 2464 return self._parse_withisolatedloading() 2465 2466 def _parse_procedure_option(self) -> exp.Expression | None: 2467 if self._match_text_seq("EXECUTE", "AS"): 2468 return self.expression( 2469 exp.ExecuteAsProperty, 2470 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2471 or self._parse_string(), 2472 ) 2473 2474 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2475 2476 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2477 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2478 self._match(TokenType.EQ) 2479 2480 user = self._parse_id_var() 2481 self._match(TokenType.PARAMETER) 2482 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2483 2484 if not user or not host: 2485 return None 2486 2487 return exp.DefinerProperty(this=f"{user}@{host}") 2488 2489 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2490 self._match(TokenType.TABLE) 2491 self._match(TokenType.EQ) 2492 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2493 2494 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2495 return self.expression(exp.LogProperty, no=no) 2496 2497 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2498 return self.expression(exp.JournalProperty, **kwargs) 2499 2500 def _parse_checksum(self) -> exp.ChecksumProperty: 2501 self._match(TokenType.EQ) 2502 2503 on = None 2504 if self._match(TokenType.ON): 2505 on = True 2506 elif self._match_text_seq("OFF"): 2507 on = False 2508 2509 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2510 2511 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2512 return self.expression( 2513 exp.Cluster, 2514 expressions=( 2515 self._parse_wrapped_csv(self._parse_ordered) 2516 if wrapped 2517 else self._parse_csv(self._parse_ordered) 2518 ), 2519 ) 2520 2521 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2522 self._match_text_seq("BY") 2523 2524 self._match_l_paren() 2525 expressions = self._parse_csv(self._parse_column) 2526 self._match_r_paren() 2527 2528 if self._match_text_seq("SORTED", "BY"): 2529 self._match_l_paren() 2530 sorted_by = self._parse_csv(self._parse_ordered) 2531 self._match_r_paren() 2532 else: 2533 sorted_by = None 2534 2535 self._match(TokenType.INTO) 2536 buckets = self._parse_number() 2537 self._match_text_seq("BUCKETS") 2538 2539 return self.expression( 2540 exp.ClusteredByProperty, 2541 expressions=expressions, 2542 sorted_by=sorted_by, 2543 buckets=buckets, 2544 ) 2545 2546 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2547 if not self._match_text_seq("GRANTS"): 2548 self._retreat(self._index - 1) 2549 return None 2550 2551 return self.expression(exp.CopyGrantsProperty) 2552 2553 def _parse_freespace(self) -> exp.FreespaceProperty: 2554 self._match(TokenType.EQ) 2555 return self.expression( 2556 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2557 ) 2558 2559 def _parse_mergeblockratio( 2560 self, no: bool = False, default: bool = False 2561 ) -> exp.MergeBlockRatioProperty: 2562 if self._match(TokenType.EQ): 2563 return self.expression( 2564 exp.MergeBlockRatioProperty, 2565 this=self._parse_number(), 2566 percent=self._match(TokenType.PERCENT), 2567 ) 2568 2569 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2570 2571 def _parse_datablocksize( 2572 self, 2573 default: t.Optional[bool] = None, 2574 minimum: t.Optional[bool] = None, 2575 maximum: t.Optional[bool] = None, 2576 ) -> exp.DataBlocksizeProperty: 2577 self._match(TokenType.EQ) 2578 size = self._parse_number() 2579 2580 units = None 2581 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2582 units = self._prev.text 2583 2584 return self.expression( 2585 exp.DataBlocksizeProperty, 2586 size=size, 2587 units=units, 2588 default=default, 2589 minimum=minimum, 2590 maximum=maximum, 2591 ) 2592 2593 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2594 self._match(TokenType.EQ) 2595 always = self._match_text_seq("ALWAYS") 2596 manual = self._match_text_seq("MANUAL") 2597 never = self._match_text_seq("NEVER") 2598 default = self._match_text_seq("DEFAULT") 2599 2600 autotemp = None 2601 if self._match_text_seq("AUTOTEMP"): 2602 autotemp = self._parse_schema() 2603 2604 return self.expression( 2605 exp.BlockCompressionProperty, 2606 always=always, 2607 manual=manual, 2608 never=never, 2609 default=default, 2610 autotemp=autotemp, 2611 ) 2612 2613 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2614 index = self._index 2615 no = self._match_text_seq("NO") 2616 concurrent = self._match_text_seq("CONCURRENT") 2617 2618 if not self._match_text_seq("ISOLATED", "LOADING"): 2619 self._retreat(index) 2620 return None 2621 2622 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2623 return self.expression( 2624 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2625 ) 2626 2627 def _parse_locking(self) -> exp.LockingProperty: 2628 if self._match(TokenType.TABLE): 2629 kind = "TABLE" 2630 elif self._match(TokenType.VIEW): 2631 kind = "VIEW" 2632 elif self._match(TokenType.ROW): 2633 kind = "ROW" 2634 elif self._match_text_seq("DATABASE"): 2635 kind = "DATABASE" 2636 else: 2637 kind = None 2638 2639 if kind in ("DATABASE", "TABLE", "VIEW"): 2640 this = self._parse_table_parts() 2641 else: 2642 this = None 2643 2644 if self._match(TokenType.FOR): 2645 for_or_in = "FOR" 2646 elif self._match(TokenType.IN): 2647 for_or_in = "IN" 2648 else: 2649 for_or_in = None 2650 2651 if self._match_text_seq("ACCESS"): 2652 lock_type = "ACCESS" 2653 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2654 lock_type = "EXCLUSIVE" 2655 elif self._match_text_seq("SHARE"): 2656 lock_type = "SHARE" 2657 elif self._match_text_seq("READ"): 2658 lock_type = "READ" 2659 elif self._match_text_seq("WRITE"): 2660 lock_type = "WRITE" 2661 elif self._match_text_seq("CHECKSUM"): 2662 lock_type = "CHECKSUM" 2663 else: 2664 lock_type = None 2665 2666 override = self._match_text_seq("OVERRIDE") 2667 2668 return self.expression( 2669 exp.LockingProperty, 2670 this=this, 2671 kind=kind, 2672 for_or_in=for_or_in, 2673 lock_type=lock_type, 2674 override=override, 2675 ) 2676 2677 def _parse_partition_by(self) -> t.List[exp.Expression]: 2678 if self._match(TokenType.PARTITION_BY): 2679 return self._parse_csv(self._parse_assignment) 2680 return [] 2681 2682 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2683 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2684 if self._match_text_seq("MINVALUE"): 2685 return exp.var("MINVALUE") 2686 if self._match_text_seq("MAXVALUE"): 2687 return exp.var("MAXVALUE") 2688 return self._parse_bitwise() 2689 2690 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2691 expression = None 2692 from_expressions = None 2693 to_expressions = None 2694 2695 if self._match(TokenType.IN): 2696 this = self._parse_wrapped_csv(self._parse_bitwise) 2697 elif self._match(TokenType.FROM): 2698 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2699 self._match_text_seq("TO") 2700 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2701 elif self._match_text_seq("WITH", "(", "MODULUS"): 2702 this = self._parse_number() 2703 self._match_text_seq(",", "REMAINDER") 2704 expression = self._parse_number() 2705 self._match_r_paren() 2706 else: 2707 self.raise_error("Failed to parse partition bound spec.") 2708 2709 return self.expression( 2710 exp.PartitionBoundSpec, 2711 this=this, 2712 expression=expression, 2713 from_expressions=from_expressions, 2714 to_expressions=to_expressions, 2715 ) 2716 2717 # https://www.postgresql.org/docs/current/sql-createtable.html 2718 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2719 if not self._match_text_seq("OF"): 2720 self._retreat(self._index - 1) 2721 return None 2722 2723 this = self._parse_table(schema=True) 2724 2725 if self._match(TokenType.DEFAULT): 2726 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2727 elif self._match_text_seq("FOR", "VALUES"): 2728 expression = self._parse_partition_bound_spec() 2729 else: 2730 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2731 2732 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2733 2734 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2735 self._match(TokenType.EQ) 2736 return self.expression( 2737 exp.PartitionedByProperty, 2738 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2739 ) 2740 2741 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2742 if self._match_text_seq("AND", "STATISTICS"): 2743 statistics = True 2744 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2745 statistics = False 2746 else: 2747 statistics = None 2748 2749 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2750 2751 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2752 if self._match_text_seq("SQL"): 2753 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2754 return None 2755 2756 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2757 if self._match_text_seq("SQL", "DATA"): 2758 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2759 return None 2760 2761 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2762 if self._match_text_seq("PRIMARY", "INDEX"): 2763 return exp.NoPrimaryIndexProperty() 2764 if self._match_text_seq("SQL"): 2765 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2766 return None 2767 2768 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2769 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2770 return exp.OnCommitProperty() 2771 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2772 return exp.OnCommitProperty(delete=True) 2773 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2774 2775 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2776 if self._match_text_seq("SQL", "DATA"): 2777 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2778 return None 2779 2780 def _parse_distkey(self) -> exp.DistKeyProperty: 2781 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2782 2783 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2784 table = self._parse_table(schema=True) 2785 2786 options = [] 2787 while self._match_texts(("INCLUDING", "EXCLUDING")): 2788 this = self._prev.text.upper() 2789 2790 id_var = self._parse_id_var() 2791 if not id_var: 2792 return None 2793 2794 options.append( 2795 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2796 ) 2797 2798 return self.expression(exp.LikeProperty, this=table, expressions=options) 2799 2800 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2801 return self.expression( 2802 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2803 ) 2804 2805 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2806 self._match(TokenType.EQ) 2807 return self.expression( 2808 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2809 ) 2810 2811 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2812 self._match_text_seq("WITH", "CONNECTION") 2813 return self.expression( 2814 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2815 ) 2816 2817 def _parse_returns(self) -> exp.ReturnsProperty: 2818 value: t.Optional[exp.Expression] 2819 null = None 2820 is_table = self._match(TokenType.TABLE) 2821 2822 if is_table: 2823 if self._match(TokenType.LT): 2824 value = self.expression( 2825 exp.Schema, 2826 this="TABLE", 2827 expressions=self._parse_csv(self._parse_struct_types), 2828 ) 2829 if not self._match(TokenType.GT): 2830 self.raise_error("Expecting >") 2831 else: 2832 value = self._parse_schema(exp.var("TABLE")) 2833 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2834 null = True 2835 value = None 2836 else: 2837 value = self._parse_types() 2838 2839 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2840 2841 def _parse_describe(self) -> exp.Describe: 2842 kind = self._match_set(self.CREATABLES) and self._prev.text 2843 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2844 if self._match(TokenType.DOT): 2845 style = None 2846 self._retreat(self._index - 2) 2847 2848 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2849 2850 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2851 this = self._parse_statement() 2852 else: 2853 this = self._parse_table(schema=True) 2854 2855 properties = self._parse_properties() 2856 expressions = properties.expressions if properties else None 2857 partition = self._parse_partition() 2858 return self.expression( 2859 exp.Describe, 2860 this=this, 2861 style=style, 2862 kind=kind, 2863 expressions=expressions, 2864 partition=partition, 2865 format=format, 2866 ) 2867 2868 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2869 kind = self._prev.text.upper() 2870 expressions = [] 2871 2872 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2873 if self._match(TokenType.WHEN): 2874 expression = self._parse_disjunction() 2875 self._match(TokenType.THEN) 2876 else: 2877 expression = None 2878 2879 else_ = self._match(TokenType.ELSE) 2880 2881 if not self._match(TokenType.INTO): 2882 return None 2883 2884 return self.expression( 2885 exp.ConditionalInsert, 2886 this=self.expression( 2887 exp.Insert, 2888 this=self._parse_table(schema=True), 2889 expression=self._parse_derived_table_values(), 2890 ), 2891 expression=expression, 2892 else_=else_, 2893 ) 2894 2895 expression = parse_conditional_insert() 2896 while expression is not None: 2897 expressions.append(expression) 2898 expression = parse_conditional_insert() 2899 2900 return self.expression( 2901 exp.MultitableInserts, 2902 kind=kind, 2903 comments=comments, 2904 expressions=expressions, 2905 source=self._parse_table(), 2906 ) 2907 2908 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2909 comments = [] 2910 hint = self._parse_hint() 2911 overwrite = self._match(TokenType.OVERWRITE) 2912 ignore = self._match(TokenType.IGNORE) 2913 local = self._match_text_seq("LOCAL") 2914 alternative = None 2915 is_function = None 2916 2917 if self._match_text_seq("DIRECTORY"): 2918 this: t.Optional[exp.Expression] = self.expression( 2919 exp.Directory, 2920 this=self._parse_var_or_string(), 2921 local=local, 2922 row_format=self._parse_row_format(match_row=True), 2923 ) 2924 else: 2925 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2926 comments += ensure_list(self._prev_comments) 2927 return self._parse_multitable_inserts(comments) 2928 2929 if self._match(TokenType.OR): 2930 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2931 2932 self._match(TokenType.INTO) 2933 comments += ensure_list(self._prev_comments) 2934 self._match(TokenType.TABLE) 2935 is_function = self._match(TokenType.FUNCTION) 2936 2937 this = ( 2938 self._parse_table(schema=True, parse_partition=True) 2939 if not is_function 2940 else self._parse_function() 2941 ) 2942 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2943 this.set("alias", self._parse_table_alias()) 2944 2945 returning = self._parse_returning() 2946 2947 return self.expression( 2948 exp.Insert, 2949 comments=comments, 2950 hint=hint, 2951 is_function=is_function, 2952 this=this, 2953 stored=self._match_text_seq("STORED") and self._parse_stored(), 2954 by_name=self._match_text_seq("BY", "NAME"), 2955 exists=self._parse_exists(), 2956 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2957 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2958 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2959 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2960 conflict=self._parse_on_conflict(), 2961 returning=returning or self._parse_returning(), 2962 overwrite=overwrite, 2963 alternative=alternative, 2964 ignore=ignore, 2965 source=self._match(TokenType.TABLE) and self._parse_table(), 2966 ) 2967 2968 def _parse_kill(self) -> exp.Kill: 2969 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2970 2971 return self.expression( 2972 exp.Kill, 2973 this=self._parse_primary(), 2974 kind=kind, 2975 ) 2976 2977 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2978 conflict = self._match_text_seq("ON", "CONFLICT") 2979 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2980 2981 if not conflict and not duplicate: 2982 return None 2983 2984 conflict_keys = None 2985 constraint = None 2986 2987 if conflict: 2988 if self._match_text_seq("ON", "CONSTRAINT"): 2989 constraint = self._parse_id_var() 2990 elif self._match(TokenType.L_PAREN): 2991 conflict_keys = self._parse_csv(self._parse_id_var) 2992 self._match_r_paren() 2993 2994 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2995 if self._prev.token_type == TokenType.UPDATE: 2996 self._match(TokenType.SET) 2997 expressions = self._parse_csv(self._parse_equality) 2998 else: 2999 expressions = None 3000 3001 return self.expression( 3002 exp.OnConflict, 3003 duplicate=duplicate, 3004 expressions=expressions, 3005 action=action, 3006 conflict_keys=conflict_keys, 3007 constraint=constraint, 3008 where=self._parse_where(), 3009 ) 3010 3011 def _parse_returning(self) -> t.Optional[exp.Returning]: 3012 if not self._match(TokenType.RETURNING): 3013 return None 3014 return self.expression( 3015 exp.Returning, 3016 expressions=self._parse_csv(self._parse_expression), 3017 into=self._match(TokenType.INTO) and self._parse_table_part(), 3018 ) 3019 3020 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3021 if not self._match(TokenType.FORMAT): 3022 return None 3023 return self._parse_row_format() 3024 3025 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3026 index = self._index 3027 with_ = with_ or self._match_text_seq("WITH") 3028 3029 if not self._match(TokenType.SERDE_PROPERTIES): 3030 self._retreat(index) 3031 return None 3032 return self.expression( 3033 exp.SerdeProperties, 3034 **{ # type: ignore 3035 "expressions": self._parse_wrapped_properties(), 3036 "with": with_, 3037 }, 3038 ) 3039 3040 def _parse_row_format( 3041 self, match_row: bool = False 3042 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3043 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3044 return None 3045 3046 if self._match_text_seq("SERDE"): 3047 this = self._parse_string() 3048 3049 serde_properties = self._parse_serde_properties() 3050 3051 return self.expression( 3052 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3053 ) 3054 3055 self._match_text_seq("DELIMITED") 3056 3057 kwargs = {} 3058 3059 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3060 kwargs["fields"] = self._parse_string() 3061 if self._match_text_seq("ESCAPED", "BY"): 3062 kwargs["escaped"] = self._parse_string() 3063 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3064 kwargs["collection_items"] = self._parse_string() 3065 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3066 kwargs["map_keys"] = self._parse_string() 3067 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3068 kwargs["lines"] = self._parse_string() 3069 if self._match_text_seq("NULL", "DEFINED", "AS"): 3070 kwargs["null"] = self._parse_string() 3071 3072 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3073 3074 def _parse_load(self) -> exp.LoadData | exp.Command: 3075 if self._match_text_seq("DATA"): 3076 local = self._match_text_seq("LOCAL") 3077 self._match_text_seq("INPATH") 3078 inpath = self._parse_string() 3079 overwrite = self._match(TokenType.OVERWRITE) 3080 self._match_pair(TokenType.INTO, TokenType.TABLE) 3081 3082 return self.expression( 3083 exp.LoadData, 3084 this=self._parse_table(schema=True), 3085 local=local, 3086 overwrite=overwrite, 3087 inpath=inpath, 3088 partition=self._parse_partition(), 3089 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3090 serde=self._match_text_seq("SERDE") and self._parse_string(), 3091 ) 3092 return self._parse_as_command(self._prev) 3093 3094 def _parse_delete(self) -> exp.Delete: 3095 # This handles MySQL's "Multiple-Table Syntax" 3096 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3097 tables = None 3098 if not self._match(TokenType.FROM, advance=False): 3099 tables = self._parse_csv(self._parse_table) or None 3100 3101 returning = self._parse_returning() 3102 3103 return self.expression( 3104 exp.Delete, 3105 tables=tables, 3106 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3107 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3108 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3109 where=self._parse_where(), 3110 returning=returning or self._parse_returning(), 3111 limit=self._parse_limit(), 3112 ) 3113 3114 def _parse_update(self) -> exp.Update: 3115 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3116 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3117 returning = self._parse_returning() 3118 return self.expression( 3119 exp.Update, 3120 **{ # type: ignore 3121 "this": this, 3122 "expressions": expressions, 3123 "from": self._parse_from(joins=True), 3124 "where": self._parse_where(), 3125 "returning": returning or self._parse_returning(), 3126 "order": self._parse_order(), 3127 "limit": self._parse_limit(), 3128 }, 3129 ) 3130 3131 def _parse_use(self) -> exp.Use: 3132 return self.expression( 3133 exp.Use, 3134 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3135 this=self._parse_table(schema=False), 3136 ) 3137 3138 def _parse_uncache(self) -> exp.Uncache: 3139 if not self._match(TokenType.TABLE): 3140 self.raise_error("Expecting TABLE after UNCACHE") 3141 3142 return self.expression( 3143 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3144 ) 3145 3146 def _parse_cache(self) -> exp.Cache: 3147 lazy = self._match_text_seq("LAZY") 3148 self._match(TokenType.TABLE) 3149 table = self._parse_table(schema=True) 3150 3151 options = [] 3152 if self._match_text_seq("OPTIONS"): 3153 self._match_l_paren() 3154 k = self._parse_string() 3155 self._match(TokenType.EQ) 3156 v = self._parse_string() 3157 options = [k, v] 3158 self._match_r_paren() 3159 3160 self._match(TokenType.ALIAS) 3161 return self.expression( 3162 exp.Cache, 3163 this=table, 3164 lazy=lazy, 3165 options=options, 3166 expression=self._parse_select(nested=True), 3167 ) 3168 3169 def _parse_partition(self) -> t.Optional[exp.Partition]: 3170 if not self._match_texts(self.PARTITION_KEYWORDS): 3171 return None 3172 3173 return self.expression( 3174 exp.Partition, 3175 subpartition=self._prev.text.upper() == "SUBPARTITION", 3176 expressions=self._parse_wrapped_csv(self._parse_assignment), 3177 ) 3178 3179 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3180 def _parse_value_expression() -> t.Optional[exp.Expression]: 3181 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3182 return exp.var(self._prev.text.upper()) 3183 return self._parse_expression() 3184 3185 if self._match(TokenType.L_PAREN): 3186 expressions = self._parse_csv(_parse_value_expression) 3187 self._match_r_paren() 3188 return self.expression(exp.Tuple, expressions=expressions) 3189 3190 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3191 expression = self._parse_expression() 3192 if expression: 3193 return self.expression(exp.Tuple, expressions=[expression]) 3194 return None 3195 3196 def _parse_projections(self) -> t.List[exp.Expression]: 3197 return self._parse_expressions() 3198 3199 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3200 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3201 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3202 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3203 ) 3204 elif self._match(TokenType.FROM): 3205 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3206 # Support parentheses for duckdb FROM-first syntax 3207 select = self._parse_select() 3208 if select: 3209 select.set("from", from_) 3210 this = select 3211 else: 3212 this = exp.select("*").from_(t.cast(exp.From, from_)) 3213 else: 3214 this = ( 3215 self._parse_table(consume_pipe=True) 3216 if table 3217 else self._parse_select(nested=True, parse_set_operation=False) 3218 ) 3219 3220 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3221 # in case a modifier (e.g. join) is following 3222 if table and isinstance(this, exp.Values) and this.alias: 3223 alias = this.args["alias"].pop() 3224 this = exp.Table(this=this, alias=alias) 3225 3226 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3227 3228 return this 3229 3230 def _parse_select( 3231 self, 3232 nested: bool = False, 3233 table: bool = False, 3234 parse_subquery_alias: bool = True, 3235 parse_set_operation: bool = True, 3236 consume_pipe: bool = True, 3237 ) -> t.Optional[exp.Expression]: 3238 query = self._parse_select_query( 3239 nested=nested, 3240 table=table, 3241 parse_subquery_alias=parse_subquery_alias, 3242 parse_set_operation=parse_set_operation, 3243 ) 3244 3245 if ( 3246 consume_pipe 3247 and self._match(TokenType.PIPE_GT, advance=False) 3248 and isinstance(query, exp.Query) 3249 ): 3250 query = self._parse_pipe_syntax_query(query) 3251 query = query.subquery(copy=False) if query and table else query 3252 3253 return query 3254 3255 def _parse_select_query( 3256 self, 3257 nested: bool = False, 3258 table: bool = False, 3259 parse_subquery_alias: bool = True, 3260 parse_set_operation: bool = True, 3261 ) -> t.Optional[exp.Expression]: 3262 cte = self._parse_with() 3263 3264 if cte: 3265 this = self._parse_statement() 3266 3267 if not this: 3268 self.raise_error("Failed to parse any statement following CTE") 3269 return cte 3270 3271 if "with" in this.arg_types: 3272 this.set("with", cte) 3273 else: 3274 self.raise_error(f"{this.key} does not support CTE") 3275 this = cte 3276 3277 return this 3278 3279 # duckdb supports leading with FROM x 3280 from_ = ( 3281 self._parse_from(consume_pipe=True) 3282 if self._match(TokenType.FROM, advance=False) 3283 else None 3284 ) 3285 3286 if self._match(TokenType.SELECT): 3287 comments = self._prev_comments 3288 3289 hint = self._parse_hint() 3290 3291 if self._next and not self._next.token_type == TokenType.DOT: 3292 all_ = self._match(TokenType.ALL) 3293 distinct = self._match_set(self.DISTINCT_TOKENS) 3294 else: 3295 all_, distinct = None, None 3296 3297 kind = ( 3298 self._match(TokenType.ALIAS) 3299 and self._match_texts(("STRUCT", "VALUE")) 3300 and self._prev.text.upper() 3301 ) 3302 3303 if distinct: 3304 distinct = self.expression( 3305 exp.Distinct, 3306 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3307 ) 3308 3309 if all_ and distinct: 3310 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3311 3312 operation_modifiers = [] 3313 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3314 operation_modifiers.append(exp.var(self._prev.text.upper())) 3315 3316 limit = self._parse_limit(top=True) 3317 projections = self._parse_projections() 3318 3319 this = self.expression( 3320 exp.Select, 3321 kind=kind, 3322 hint=hint, 3323 distinct=distinct, 3324 expressions=projections, 3325 limit=limit, 3326 operation_modifiers=operation_modifiers or None, 3327 ) 3328 this.comments = comments 3329 3330 into = self._parse_into() 3331 if into: 3332 this.set("into", into) 3333 3334 if not from_: 3335 from_ = self._parse_from() 3336 3337 if from_: 3338 this.set("from", from_) 3339 3340 this = self._parse_query_modifiers(this) 3341 elif (table or nested) and self._match(TokenType.L_PAREN): 3342 this = self._parse_wrapped_select(table=table) 3343 3344 # We return early here so that the UNION isn't attached to the subquery by the 3345 # following call to _parse_set_operations, but instead becomes the parent node 3346 self._match_r_paren() 3347 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3348 elif self._match(TokenType.VALUES, advance=False): 3349 this = self._parse_derived_table_values() 3350 elif from_: 3351 this = exp.select("*").from_(from_.this, copy=False) 3352 elif self._match(TokenType.SUMMARIZE): 3353 table = self._match(TokenType.TABLE) 3354 this = self._parse_select() or self._parse_string() or self._parse_table() 3355 return self.expression(exp.Summarize, this=this, table=table) 3356 elif self._match(TokenType.DESCRIBE): 3357 this = self._parse_describe() 3358 elif self._match_text_seq("STREAM"): 3359 this = self._parse_function() 3360 if this: 3361 this = self.expression(exp.Stream, this=this) 3362 else: 3363 self._retreat(self._index - 1) 3364 else: 3365 this = None 3366 3367 return self._parse_set_operations(this) if parse_set_operation else this 3368 3369 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3370 self._match_text_seq("SEARCH") 3371 3372 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3373 3374 if not kind: 3375 return None 3376 3377 self._match_text_seq("FIRST", "BY") 3378 3379 return self.expression( 3380 exp.RecursiveWithSearch, 3381 kind=kind, 3382 this=self._parse_id_var(), 3383 expression=self._match_text_seq("SET") and self._parse_id_var(), 3384 using=self._match_text_seq("USING") and self._parse_id_var(), 3385 ) 3386 3387 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3388 if not skip_with_token and not self._match(TokenType.WITH): 3389 return None 3390 3391 comments = self._prev_comments 3392 recursive = self._match(TokenType.RECURSIVE) 3393 3394 last_comments = None 3395 expressions = [] 3396 while True: 3397 cte = self._parse_cte() 3398 if isinstance(cte, exp.CTE): 3399 expressions.append(cte) 3400 if last_comments: 3401 cte.add_comments(last_comments) 3402 3403 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3404 break 3405 else: 3406 self._match(TokenType.WITH) 3407 3408 last_comments = self._prev_comments 3409 3410 return self.expression( 3411 exp.With, 3412 comments=comments, 3413 expressions=expressions, 3414 recursive=recursive, 3415 search=self._parse_recursive_with_search(), 3416 ) 3417 3418 def _parse_cte(self) -> t.Optional[exp.CTE]: 3419 index = self._index 3420 3421 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3422 if not alias or not alias.this: 3423 self.raise_error("Expected CTE to have alias") 3424 3425 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3426 self._retreat(index) 3427 return None 3428 3429 comments = self._prev_comments 3430 3431 if self._match_text_seq("NOT", "MATERIALIZED"): 3432 materialized = False 3433 elif self._match_text_seq("MATERIALIZED"): 3434 materialized = True 3435 else: 3436 materialized = None 3437 3438 cte = self.expression( 3439 exp.CTE, 3440 this=self._parse_wrapped(self._parse_statement), 3441 alias=alias, 3442 materialized=materialized, 3443 comments=comments, 3444 ) 3445 3446 values = cte.this 3447 if isinstance(values, exp.Values): 3448 if values.alias: 3449 cte.set("this", exp.select("*").from_(values)) 3450 else: 3451 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3452 3453 return cte 3454 3455 def _parse_table_alias( 3456 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3457 ) -> t.Optional[exp.TableAlias]: 3458 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3459 # so this section tries to parse the clause version and if it fails, it treats the token 3460 # as an identifier (alias) 3461 if self._can_parse_limit_or_offset(): 3462 return None 3463 3464 any_token = self._match(TokenType.ALIAS) 3465 alias = ( 3466 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3467 or self._parse_string_as_identifier() 3468 ) 3469 3470 index = self._index 3471 if self._match(TokenType.L_PAREN): 3472 columns = self._parse_csv(self._parse_function_parameter) 3473 self._match_r_paren() if columns else self._retreat(index) 3474 else: 3475 columns = None 3476 3477 if not alias and not columns: 3478 return None 3479 3480 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3481 3482 # We bubble up comments from the Identifier to the TableAlias 3483 if isinstance(alias, exp.Identifier): 3484 table_alias.add_comments(alias.pop_comments()) 3485 3486 return table_alias 3487 3488 def _parse_subquery( 3489 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3490 ) -> t.Optional[exp.Subquery]: 3491 if not this: 3492 return None 3493 3494 return self.expression( 3495 exp.Subquery, 3496 this=this, 3497 pivots=self._parse_pivots(), 3498 alias=self._parse_table_alias() if parse_alias else None, 3499 sample=self._parse_table_sample(), 3500 ) 3501 3502 def _implicit_unnests_to_explicit(self, this: E) -> E: 3503 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3504 3505 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3506 for i, join in enumerate(this.args.get("joins") or []): 3507 table = join.this 3508 normalized_table = table.copy() 3509 normalized_table.meta["maybe_column"] = True 3510 normalized_table = _norm(normalized_table, dialect=self.dialect) 3511 3512 if isinstance(table, exp.Table) and not join.args.get("on"): 3513 if normalized_table.parts[0].name in refs: 3514 table_as_column = table.to_column() 3515 unnest = exp.Unnest(expressions=[table_as_column]) 3516 3517 # Table.to_column creates a parent Alias node that we want to convert to 3518 # a TableAlias and attach to the Unnest, so it matches the parser's output 3519 if isinstance(table.args.get("alias"), exp.TableAlias): 3520 table_as_column.replace(table_as_column.this) 3521 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3522 3523 table.replace(unnest) 3524 3525 refs.add(normalized_table.alias_or_name) 3526 3527 return this 3528 3529 def _parse_query_modifiers( 3530 self, this: t.Optional[exp.Expression] 3531 ) -> t.Optional[exp.Expression]: 3532 if isinstance(this, self.MODIFIABLES): 3533 for join in self._parse_joins(): 3534 this.append("joins", join) 3535 for lateral in iter(self._parse_lateral, None): 3536 this.append("laterals", lateral) 3537 3538 while True: 3539 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3540 modifier_token = self._curr 3541 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3542 key, expression = parser(self) 3543 3544 if expression: 3545 if this.args.get(key): 3546 self.raise_error( 3547 f"Found multiple '{modifier_token.text.upper()}' clauses", 3548 token=modifier_token, 3549 ) 3550 3551 this.set(key, expression) 3552 if key == "limit": 3553 offset = expression.args.pop("offset", None) 3554 3555 if offset: 3556 offset = exp.Offset(expression=offset) 3557 this.set("offset", offset) 3558 3559 limit_by_expressions = expression.expressions 3560 expression.set("expressions", None) 3561 offset.set("expressions", limit_by_expressions) 3562 continue 3563 break 3564 3565 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3566 this = self._implicit_unnests_to_explicit(this) 3567 3568 return this 3569 3570 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3571 start = self._curr 3572 while self._curr: 3573 self._advance() 3574 3575 end = self._tokens[self._index - 1] 3576 return exp.Hint(expressions=[self._find_sql(start, end)]) 3577 3578 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3579 return self._parse_function_call() 3580 3581 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3582 start_index = self._index 3583 should_fallback_to_string = False 3584 3585 hints = [] 3586 try: 3587 for hint in iter( 3588 lambda: self._parse_csv( 3589 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3590 ), 3591 [], 3592 ): 3593 hints.extend(hint) 3594 except ParseError: 3595 should_fallback_to_string = True 3596 3597 if should_fallback_to_string or self._curr: 3598 self._retreat(start_index) 3599 return self._parse_hint_fallback_to_string() 3600 3601 return self.expression(exp.Hint, expressions=hints) 3602 3603 def _parse_hint(self) -> t.Optional[exp.Hint]: 3604 if self._match(TokenType.HINT) and self._prev_comments: 3605 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3606 3607 return None 3608 3609 def _parse_into(self) -> t.Optional[exp.Into]: 3610 if not self._match(TokenType.INTO): 3611 return None 3612 3613 temp = self._match(TokenType.TEMPORARY) 3614 unlogged = self._match_text_seq("UNLOGGED") 3615 self._match(TokenType.TABLE) 3616 3617 return self.expression( 3618 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3619 ) 3620 3621 def _parse_from( 3622 self, 3623 joins: bool = False, 3624 skip_from_token: bool = False, 3625 consume_pipe: bool = False, 3626 ) -> t.Optional[exp.From]: 3627 if not skip_from_token and not self._match(TokenType.FROM): 3628 return None 3629 3630 return self.expression( 3631 exp.From, 3632 comments=self._prev_comments, 3633 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3634 ) 3635 3636 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3637 return self.expression( 3638 exp.MatchRecognizeMeasure, 3639 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3640 this=self._parse_expression(), 3641 ) 3642 3643 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3644 if not self._match(TokenType.MATCH_RECOGNIZE): 3645 return None 3646 3647 self._match_l_paren() 3648 3649 partition = self._parse_partition_by() 3650 order = self._parse_order() 3651 3652 measures = ( 3653 self._parse_csv(self._parse_match_recognize_measure) 3654 if self._match_text_seq("MEASURES") 3655 else None 3656 ) 3657 3658 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3659 rows = exp.var("ONE ROW PER MATCH") 3660 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3661 text = "ALL ROWS PER MATCH" 3662 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3663 text += " SHOW EMPTY MATCHES" 3664 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3665 text += " OMIT EMPTY MATCHES" 3666 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3667 text += " WITH UNMATCHED ROWS" 3668 rows = exp.var(text) 3669 else: 3670 rows = None 3671 3672 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3673 text = "AFTER MATCH SKIP" 3674 if self._match_text_seq("PAST", "LAST", "ROW"): 3675 text += " PAST LAST ROW" 3676 elif self._match_text_seq("TO", "NEXT", "ROW"): 3677 text += " TO NEXT ROW" 3678 elif self._match_text_seq("TO", "FIRST"): 3679 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3680 elif self._match_text_seq("TO", "LAST"): 3681 text += f" TO LAST {self._advance_any().text}" # type: ignore 3682 after = exp.var(text) 3683 else: 3684 after = None 3685 3686 if self._match_text_seq("PATTERN"): 3687 self._match_l_paren() 3688 3689 if not self._curr: 3690 self.raise_error("Expecting )", self._curr) 3691 3692 paren = 1 3693 start = self._curr 3694 3695 while self._curr and paren > 0: 3696 if self._curr.token_type == TokenType.L_PAREN: 3697 paren += 1 3698 if self._curr.token_type == TokenType.R_PAREN: 3699 paren -= 1 3700 3701 end = self._prev 3702 self._advance() 3703 3704 if paren > 0: 3705 self.raise_error("Expecting )", self._curr) 3706 3707 pattern = exp.var(self._find_sql(start, end)) 3708 else: 3709 pattern = None 3710 3711 define = ( 3712 self._parse_csv(self._parse_name_as_expression) 3713 if self._match_text_seq("DEFINE") 3714 else None 3715 ) 3716 3717 self._match_r_paren() 3718 3719 return self.expression( 3720 exp.MatchRecognize, 3721 partition_by=partition, 3722 order=order, 3723 measures=measures, 3724 rows=rows, 3725 after=after, 3726 pattern=pattern, 3727 define=define, 3728 alias=self._parse_table_alias(), 3729 ) 3730 3731 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3732 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3733 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3734 cross_apply = False 3735 3736 if cross_apply is not None: 3737 this = self._parse_select(table=True) 3738 view = None 3739 outer = None 3740 elif self._match(TokenType.LATERAL): 3741 this = self._parse_select(table=True) 3742 view = self._match(TokenType.VIEW) 3743 outer = self._match(TokenType.OUTER) 3744 else: 3745 return None 3746 3747 if not this: 3748 this = ( 3749 self._parse_unnest() 3750 or self._parse_function() 3751 or self._parse_id_var(any_token=False) 3752 ) 3753 3754 while self._match(TokenType.DOT): 3755 this = exp.Dot( 3756 this=this, 3757 expression=self._parse_function() or self._parse_id_var(any_token=False), 3758 ) 3759 3760 ordinality: t.Optional[bool] = None 3761 3762 if view: 3763 table = self._parse_id_var(any_token=False) 3764 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3765 table_alias: t.Optional[exp.TableAlias] = self.expression( 3766 exp.TableAlias, this=table, columns=columns 3767 ) 3768 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3769 # We move the alias from the lateral's child node to the lateral itself 3770 table_alias = this.args["alias"].pop() 3771 else: 3772 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3773 table_alias = self._parse_table_alias() 3774 3775 return self.expression( 3776 exp.Lateral, 3777 this=this, 3778 view=view, 3779 outer=outer, 3780 alias=table_alias, 3781 cross_apply=cross_apply, 3782 ordinality=ordinality, 3783 ) 3784 3785 def _parse_join_parts( 3786 self, 3787 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3788 return ( 3789 self._match_set(self.JOIN_METHODS) and self._prev, 3790 self._match_set(self.JOIN_SIDES) and self._prev, 3791 self._match_set(self.JOIN_KINDS) and self._prev, 3792 ) 3793 3794 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3795 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3796 this = self._parse_column() 3797 if isinstance(this, exp.Column): 3798 return this.this 3799 return this 3800 3801 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3802 3803 def _parse_join( 3804 self, skip_join_token: bool = False, parse_bracket: bool = False 3805 ) -> t.Optional[exp.Join]: 3806 if self._match(TokenType.COMMA): 3807 table = self._try_parse(self._parse_table) 3808 cross_join = self.expression(exp.Join, this=table) if table else None 3809 3810 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3811 cross_join.set("kind", "CROSS") 3812 3813 return cross_join 3814 3815 index = self._index 3816 method, side, kind = self._parse_join_parts() 3817 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3818 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3819 join_comments = self._prev_comments 3820 3821 if not skip_join_token and not join: 3822 self._retreat(index) 3823 kind = None 3824 method = None 3825 side = None 3826 3827 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3828 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3829 3830 if not skip_join_token and not join and not outer_apply and not cross_apply: 3831 return None 3832 3833 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3834 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3835 kwargs["expressions"] = self._parse_csv( 3836 lambda: self._parse_table(parse_bracket=parse_bracket) 3837 ) 3838 3839 if method: 3840 kwargs["method"] = method.text 3841 if side: 3842 kwargs["side"] = side.text 3843 if kind: 3844 kwargs["kind"] = kind.text 3845 if hint: 3846 kwargs["hint"] = hint 3847 3848 if self._match(TokenType.MATCH_CONDITION): 3849 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3850 3851 if self._match(TokenType.ON): 3852 kwargs["on"] = self._parse_assignment() 3853 elif self._match(TokenType.USING): 3854 kwargs["using"] = self._parse_using_identifiers() 3855 elif ( 3856 not method 3857 and not (outer_apply or cross_apply) 3858 and not isinstance(kwargs["this"], exp.Unnest) 3859 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3860 ): 3861 index = self._index 3862 joins: t.Optional[list] = list(self._parse_joins()) 3863 3864 if joins and self._match(TokenType.ON): 3865 kwargs["on"] = self._parse_assignment() 3866 elif joins and self._match(TokenType.USING): 3867 kwargs["using"] = self._parse_using_identifiers() 3868 else: 3869 joins = None 3870 self._retreat(index) 3871 3872 kwargs["this"].set("joins", joins if joins else None) 3873 3874 kwargs["pivots"] = self._parse_pivots() 3875 3876 comments = [c for token in (method, side, kind) if token for c in token.comments] 3877 comments = (join_comments or []) + comments 3878 return self.expression(exp.Join, comments=comments, **kwargs) 3879 3880 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3881 this = self._parse_assignment() 3882 3883 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3884 return this 3885 3886 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3887 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3888 3889 return this 3890 3891 def _parse_index_params(self) -> exp.IndexParameters: 3892 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3893 3894 if self._match(TokenType.L_PAREN, advance=False): 3895 columns = self._parse_wrapped_csv(self._parse_with_operator) 3896 else: 3897 columns = None 3898 3899 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3900 partition_by = self._parse_partition_by() 3901 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3902 tablespace = ( 3903 self._parse_var(any_token=True) 3904 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3905 else None 3906 ) 3907 where = self._parse_where() 3908 3909 on = self._parse_field() if self._match(TokenType.ON) else None 3910 3911 return self.expression( 3912 exp.IndexParameters, 3913 using=using, 3914 columns=columns, 3915 include=include, 3916 partition_by=partition_by, 3917 where=where, 3918 with_storage=with_storage, 3919 tablespace=tablespace, 3920 on=on, 3921 ) 3922 3923 def _parse_index( 3924 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3925 ) -> t.Optional[exp.Index]: 3926 if index or anonymous: 3927 unique = None 3928 primary = None 3929 amp = None 3930 3931 self._match(TokenType.ON) 3932 self._match(TokenType.TABLE) # hive 3933 table = self._parse_table_parts(schema=True) 3934 else: 3935 unique = self._match(TokenType.UNIQUE) 3936 primary = self._match_text_seq("PRIMARY") 3937 amp = self._match_text_seq("AMP") 3938 3939 if not self._match(TokenType.INDEX): 3940 return None 3941 3942 index = self._parse_id_var() 3943 table = None 3944 3945 params = self._parse_index_params() 3946 3947 return self.expression( 3948 exp.Index, 3949 this=index, 3950 table=table, 3951 unique=unique, 3952 primary=primary, 3953 amp=amp, 3954 params=params, 3955 ) 3956 3957 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3958 hints: t.List[exp.Expression] = [] 3959 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3960 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3961 hints.append( 3962 self.expression( 3963 exp.WithTableHint, 3964 expressions=self._parse_csv( 3965 lambda: self._parse_function() or self._parse_var(any_token=True) 3966 ), 3967 ) 3968 ) 3969 self._match_r_paren() 3970 else: 3971 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3972 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3973 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3974 3975 self._match_set((TokenType.INDEX, TokenType.KEY)) 3976 if self._match(TokenType.FOR): 3977 hint.set("target", self._advance_any() and self._prev.text.upper()) 3978 3979 hint.set("expressions", self._parse_wrapped_id_vars()) 3980 hints.append(hint) 3981 3982 return hints or None 3983 3984 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3985 return ( 3986 (not schema and self._parse_function(optional_parens=False)) 3987 or self._parse_id_var(any_token=False) 3988 or self._parse_string_as_identifier() 3989 or self._parse_placeholder() 3990 ) 3991 3992 def _parse_table_parts( 3993 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3994 ) -> exp.Table: 3995 catalog = None 3996 db = None 3997 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3998 3999 while self._match(TokenType.DOT): 4000 if catalog: 4001 # This allows nesting the table in arbitrarily many dot expressions if needed 4002 table = self.expression( 4003 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4004 ) 4005 else: 4006 catalog = db 4007 db = table 4008 # "" used for tsql FROM a..b case 4009 table = self._parse_table_part(schema=schema) or "" 4010 4011 if ( 4012 wildcard 4013 and self._is_connected() 4014 and (isinstance(table, exp.Identifier) or not table) 4015 and self._match(TokenType.STAR) 4016 ): 4017 if isinstance(table, exp.Identifier): 4018 table.args["this"] += "*" 4019 else: 4020 table = exp.Identifier(this="*") 4021 4022 # We bubble up comments from the Identifier to the Table 4023 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4024 4025 if is_db_reference: 4026 catalog = db 4027 db = table 4028 table = None 4029 4030 if not table and not is_db_reference: 4031 self.raise_error(f"Expected table name but got {self._curr}") 4032 if not db and is_db_reference: 4033 self.raise_error(f"Expected database name but got {self._curr}") 4034 4035 table = self.expression( 4036 exp.Table, 4037 comments=comments, 4038 this=table, 4039 db=db, 4040 catalog=catalog, 4041 ) 4042 4043 changes = self._parse_changes() 4044 if changes: 4045 table.set("changes", changes) 4046 4047 at_before = self._parse_historical_data() 4048 if at_before: 4049 table.set("when", at_before) 4050 4051 pivots = self._parse_pivots() 4052 if pivots: 4053 table.set("pivots", pivots) 4054 4055 return table 4056 4057 def _parse_table( 4058 self, 4059 schema: bool = False, 4060 joins: bool = False, 4061 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4062 parse_bracket: bool = False, 4063 is_db_reference: bool = False, 4064 parse_partition: bool = False, 4065 consume_pipe: bool = False, 4066 ) -> t.Optional[exp.Expression]: 4067 lateral = self._parse_lateral() 4068 if lateral: 4069 return lateral 4070 4071 unnest = self._parse_unnest() 4072 if unnest: 4073 return unnest 4074 4075 values = self._parse_derived_table_values() 4076 if values: 4077 return values 4078 4079 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4080 if subquery: 4081 if not subquery.args.get("pivots"): 4082 subquery.set("pivots", self._parse_pivots()) 4083 return subquery 4084 4085 bracket = parse_bracket and self._parse_bracket(None) 4086 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4087 4088 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4089 self._parse_table 4090 ) 4091 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4092 4093 only = self._match(TokenType.ONLY) 4094 4095 this = t.cast( 4096 exp.Expression, 4097 bracket 4098 or rows_from 4099 or self._parse_bracket( 4100 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4101 ), 4102 ) 4103 4104 if only: 4105 this.set("only", only) 4106 4107 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4108 self._match_text_seq("*") 4109 4110 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4111 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4112 this.set("partition", self._parse_partition()) 4113 4114 if schema: 4115 return self._parse_schema(this=this) 4116 4117 version = self._parse_version() 4118 4119 if version: 4120 this.set("version", version) 4121 4122 if self.dialect.ALIAS_POST_TABLESAMPLE: 4123 this.set("sample", self._parse_table_sample()) 4124 4125 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4126 if alias: 4127 this.set("alias", alias) 4128 4129 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4130 return self.expression( 4131 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4132 ) 4133 4134 this.set("hints", self._parse_table_hints()) 4135 4136 if not this.args.get("pivots"): 4137 this.set("pivots", self._parse_pivots()) 4138 4139 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4140 this.set("sample", self._parse_table_sample()) 4141 4142 if joins: 4143 for join in self._parse_joins(): 4144 this.append("joins", join) 4145 4146 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4147 this.set("ordinality", True) 4148 this.set("alias", self._parse_table_alias()) 4149 4150 return this 4151 4152 def _parse_version(self) -> t.Optional[exp.Version]: 4153 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4154 this = "TIMESTAMP" 4155 elif self._match(TokenType.VERSION_SNAPSHOT): 4156 this = "VERSION" 4157 else: 4158 return None 4159 4160 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4161 kind = self._prev.text.upper() 4162 start = self._parse_bitwise() 4163 self._match_texts(("TO", "AND")) 4164 end = self._parse_bitwise() 4165 expression: t.Optional[exp.Expression] = self.expression( 4166 exp.Tuple, expressions=[start, end] 4167 ) 4168 elif self._match_text_seq("CONTAINED", "IN"): 4169 kind = "CONTAINED IN" 4170 expression = self.expression( 4171 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4172 ) 4173 elif self._match(TokenType.ALL): 4174 kind = "ALL" 4175 expression = None 4176 else: 4177 self._match_text_seq("AS", "OF") 4178 kind = "AS OF" 4179 expression = self._parse_type() 4180 4181 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4182 4183 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4184 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4185 index = self._index 4186 historical_data = None 4187 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4188 this = self._prev.text.upper() 4189 kind = ( 4190 self._match(TokenType.L_PAREN) 4191 and self._match_texts(self.HISTORICAL_DATA_KIND) 4192 and self._prev.text.upper() 4193 ) 4194 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4195 4196 if expression: 4197 self._match_r_paren() 4198 historical_data = self.expression( 4199 exp.HistoricalData, this=this, kind=kind, expression=expression 4200 ) 4201 else: 4202 self._retreat(index) 4203 4204 return historical_data 4205 4206 def _parse_changes(self) -> t.Optional[exp.Changes]: 4207 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4208 return None 4209 4210 information = self._parse_var(any_token=True) 4211 self._match_r_paren() 4212 4213 return self.expression( 4214 exp.Changes, 4215 information=information, 4216 at_before=self._parse_historical_data(), 4217 end=self._parse_historical_data(), 4218 ) 4219 4220 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4221 if not self._match(TokenType.UNNEST): 4222 return None 4223 4224 expressions = self._parse_wrapped_csv(self._parse_equality) 4225 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4226 4227 alias = self._parse_table_alias() if with_alias else None 4228 4229 if alias: 4230 if self.dialect.UNNEST_COLUMN_ONLY: 4231 if alias.args.get("columns"): 4232 self.raise_error("Unexpected extra column alias in unnest.") 4233 4234 alias.set("columns", [alias.this]) 4235 alias.set("this", None) 4236 4237 columns = alias.args.get("columns") or [] 4238 if offset and len(expressions) < len(columns): 4239 offset = columns.pop() 4240 4241 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4242 self._match(TokenType.ALIAS) 4243 offset = self._parse_id_var( 4244 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4245 ) or exp.to_identifier("offset") 4246 4247 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4248 4249 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4250 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4251 if not is_derived and not ( 4252 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4253 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4254 ): 4255 return None 4256 4257 expressions = self._parse_csv(self._parse_value) 4258 alias = self._parse_table_alias() 4259 4260 if is_derived: 4261 self._match_r_paren() 4262 4263 return self.expression( 4264 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4265 ) 4266 4267 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4268 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4269 as_modifier and self._match_text_seq("USING", "SAMPLE") 4270 ): 4271 return None 4272 4273 bucket_numerator = None 4274 bucket_denominator = None 4275 bucket_field = None 4276 percent = None 4277 size = None 4278 seed = None 4279 4280 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4281 matched_l_paren = self._match(TokenType.L_PAREN) 4282 4283 if self.TABLESAMPLE_CSV: 4284 num = None 4285 expressions = self._parse_csv(self._parse_primary) 4286 else: 4287 expressions = None 4288 num = ( 4289 self._parse_factor() 4290 if self._match(TokenType.NUMBER, advance=False) 4291 else self._parse_primary() or self._parse_placeholder() 4292 ) 4293 4294 if self._match_text_seq("BUCKET"): 4295 bucket_numerator = self._parse_number() 4296 self._match_text_seq("OUT", "OF") 4297 bucket_denominator = bucket_denominator = self._parse_number() 4298 self._match(TokenType.ON) 4299 bucket_field = self._parse_field() 4300 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4301 percent = num 4302 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4303 size = num 4304 else: 4305 percent = num 4306 4307 if matched_l_paren: 4308 self._match_r_paren() 4309 4310 if self._match(TokenType.L_PAREN): 4311 method = self._parse_var(upper=True) 4312 seed = self._match(TokenType.COMMA) and self._parse_number() 4313 self._match_r_paren() 4314 elif self._match_texts(("SEED", "REPEATABLE")): 4315 seed = self._parse_wrapped(self._parse_number) 4316 4317 if not method and self.DEFAULT_SAMPLING_METHOD: 4318 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4319 4320 return self.expression( 4321 exp.TableSample, 4322 expressions=expressions, 4323 method=method, 4324 bucket_numerator=bucket_numerator, 4325 bucket_denominator=bucket_denominator, 4326 bucket_field=bucket_field, 4327 percent=percent, 4328 size=size, 4329 seed=seed, 4330 ) 4331 4332 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4333 return list(iter(self._parse_pivot, None)) or None 4334 4335 def _parse_joins(self) -> t.Iterator[exp.Join]: 4336 return iter(self._parse_join, None) 4337 4338 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4339 if not self._match(TokenType.INTO): 4340 return None 4341 4342 return self.expression( 4343 exp.UnpivotColumns, 4344 this=self._match_text_seq("NAME") and self._parse_column(), 4345 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4346 ) 4347 4348 # https://duckdb.org/docs/sql/statements/pivot 4349 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4350 def _parse_on() -> t.Optional[exp.Expression]: 4351 this = self._parse_bitwise() 4352 4353 if self._match(TokenType.IN): 4354 # PIVOT ... ON col IN (row_val1, row_val2) 4355 return self._parse_in(this) 4356 if self._match(TokenType.ALIAS, advance=False): 4357 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4358 return self._parse_alias(this) 4359 4360 return this 4361 4362 this = self._parse_table() 4363 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4364 into = self._parse_unpivot_columns() 4365 using = self._match(TokenType.USING) and self._parse_csv( 4366 lambda: self._parse_alias(self._parse_function()) 4367 ) 4368 group = self._parse_group() 4369 4370 return self.expression( 4371 exp.Pivot, 4372 this=this, 4373 expressions=expressions, 4374 using=using, 4375 group=group, 4376 unpivot=is_unpivot, 4377 into=into, 4378 ) 4379 4380 def _parse_pivot_in(self) -> exp.In: 4381 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4382 this = self._parse_select_or_expression() 4383 4384 self._match(TokenType.ALIAS) 4385 alias = self._parse_bitwise() 4386 if alias: 4387 if isinstance(alias, exp.Column) and not alias.db: 4388 alias = alias.this 4389 return self.expression(exp.PivotAlias, this=this, alias=alias) 4390 4391 return this 4392 4393 value = self._parse_column() 4394 4395 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4396 self.raise_error("Expecting IN (") 4397 4398 if self._match(TokenType.ANY): 4399 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4400 else: 4401 exprs = self._parse_csv(_parse_aliased_expression) 4402 4403 self._match_r_paren() 4404 return self.expression(exp.In, this=value, expressions=exprs) 4405 4406 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4407 func = self._parse_function() 4408 if not func: 4409 self.raise_error("Expecting an aggregation function in PIVOT") 4410 4411 return self._parse_alias(func) 4412 4413 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4414 index = self._index 4415 include_nulls = None 4416 4417 if self._match(TokenType.PIVOT): 4418 unpivot = False 4419 elif self._match(TokenType.UNPIVOT): 4420 unpivot = True 4421 4422 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4423 if self._match_text_seq("INCLUDE", "NULLS"): 4424 include_nulls = True 4425 elif self._match_text_seq("EXCLUDE", "NULLS"): 4426 include_nulls = False 4427 else: 4428 return None 4429 4430 expressions = [] 4431 4432 if not self._match(TokenType.L_PAREN): 4433 self._retreat(index) 4434 return None 4435 4436 if unpivot: 4437 expressions = self._parse_csv(self._parse_column) 4438 else: 4439 expressions = self._parse_csv(self._parse_pivot_aggregation) 4440 4441 if not expressions: 4442 self.raise_error("Failed to parse PIVOT's aggregation list") 4443 4444 if not self._match(TokenType.FOR): 4445 self.raise_error("Expecting FOR") 4446 4447 fields = [] 4448 while True: 4449 field = self._try_parse(self._parse_pivot_in) 4450 if not field: 4451 break 4452 fields.append(field) 4453 4454 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4455 self._parse_bitwise 4456 ) 4457 4458 group = self._parse_group() 4459 4460 self._match_r_paren() 4461 4462 pivot = self.expression( 4463 exp.Pivot, 4464 expressions=expressions, 4465 fields=fields, 4466 unpivot=unpivot, 4467 include_nulls=include_nulls, 4468 default_on_null=default_on_null, 4469 group=group, 4470 ) 4471 4472 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4473 pivot.set("alias", self._parse_table_alias()) 4474 4475 if not unpivot: 4476 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4477 4478 columns: t.List[exp.Expression] = [] 4479 all_fields = [] 4480 for pivot_field in pivot.fields: 4481 pivot_field_expressions = pivot_field.expressions 4482 4483 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4484 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4485 continue 4486 4487 all_fields.append( 4488 [ 4489 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4490 for fld in pivot_field_expressions 4491 ] 4492 ) 4493 4494 if all_fields: 4495 if names: 4496 all_fields.append(names) 4497 4498 # Generate all possible combinations of the pivot columns 4499 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4500 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4501 for fld_parts_tuple in itertools.product(*all_fields): 4502 fld_parts = list(fld_parts_tuple) 4503 4504 if names and self.PREFIXED_PIVOT_COLUMNS: 4505 # Move the "name" to the front of the list 4506 fld_parts.insert(0, fld_parts.pop(-1)) 4507 4508 columns.append(exp.to_identifier("_".join(fld_parts))) 4509 4510 pivot.set("columns", columns) 4511 4512 return pivot 4513 4514 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4515 return [agg.alias for agg in aggregations if agg.alias] 4516 4517 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4518 if not skip_where_token and not self._match(TokenType.PREWHERE): 4519 return None 4520 4521 return self.expression( 4522 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4523 ) 4524 4525 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4526 if not skip_where_token and not self._match(TokenType.WHERE): 4527 return None 4528 4529 return self.expression( 4530 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4531 ) 4532 4533 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4534 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4535 return None 4536 comments = self._prev_comments 4537 4538 elements: t.Dict[str, t.Any] = defaultdict(list) 4539 4540 if self._match(TokenType.ALL): 4541 elements["all"] = True 4542 elif self._match(TokenType.DISTINCT): 4543 elements["all"] = False 4544 4545 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4546 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4547 4548 while True: 4549 index = self._index 4550 4551 elements["expressions"].extend( 4552 self._parse_csv( 4553 lambda: None 4554 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4555 else self._parse_assignment() 4556 ) 4557 ) 4558 4559 before_with_index = self._index 4560 with_prefix = self._match(TokenType.WITH) 4561 4562 if self._match(TokenType.ROLLUP): 4563 elements["rollup"].append( 4564 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4565 ) 4566 elif self._match(TokenType.CUBE): 4567 elements["cube"].append( 4568 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4569 ) 4570 elif self._match(TokenType.GROUPING_SETS): 4571 elements["grouping_sets"].append( 4572 self.expression( 4573 exp.GroupingSets, 4574 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4575 ) 4576 ) 4577 elif self._match_text_seq("TOTALS"): 4578 elements["totals"] = True # type: ignore 4579 4580 if before_with_index <= self._index <= before_with_index + 1: 4581 self._retreat(before_with_index) 4582 break 4583 4584 if index == self._index: 4585 break 4586 4587 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4588 4589 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4590 return self.expression( 4591 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4592 ) 4593 4594 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4595 if self._match(TokenType.L_PAREN): 4596 grouping_set = self._parse_csv(self._parse_column) 4597 self._match_r_paren() 4598 return self.expression(exp.Tuple, expressions=grouping_set) 4599 4600 return self._parse_column() 4601 4602 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4603 if not skip_having_token and not self._match(TokenType.HAVING): 4604 return None 4605 return self.expression( 4606 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4607 ) 4608 4609 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4610 if not self._match(TokenType.QUALIFY): 4611 return None 4612 return self.expression(exp.Qualify, this=self._parse_assignment()) 4613 4614 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4615 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4616 exp.Prior, this=self._parse_bitwise() 4617 ) 4618 connect = self._parse_assignment() 4619 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4620 return connect 4621 4622 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4623 if skip_start_token: 4624 start = None 4625 elif self._match(TokenType.START_WITH): 4626 start = self._parse_assignment() 4627 else: 4628 return None 4629 4630 self._match(TokenType.CONNECT_BY) 4631 nocycle = self._match_text_seq("NOCYCLE") 4632 connect = self._parse_connect_with_prior() 4633 4634 if not start and self._match(TokenType.START_WITH): 4635 start = self._parse_assignment() 4636 4637 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4638 4639 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4640 this = self._parse_id_var(any_token=True) 4641 if self._match(TokenType.ALIAS): 4642 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4643 return this 4644 4645 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4646 if self._match_text_seq("INTERPOLATE"): 4647 return self._parse_wrapped_csv(self._parse_name_as_expression) 4648 return None 4649 4650 def _parse_order( 4651 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4652 ) -> t.Optional[exp.Expression]: 4653 siblings = None 4654 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4655 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4656 return this 4657 4658 siblings = True 4659 4660 return self.expression( 4661 exp.Order, 4662 comments=self._prev_comments, 4663 this=this, 4664 expressions=self._parse_csv(self._parse_ordered), 4665 siblings=siblings, 4666 ) 4667 4668 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4669 if not self._match(token): 4670 return None 4671 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4672 4673 def _parse_ordered( 4674 self, parse_method: t.Optional[t.Callable] = None 4675 ) -> t.Optional[exp.Ordered]: 4676 this = parse_method() if parse_method else self._parse_assignment() 4677 if not this: 4678 return None 4679 4680 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4681 this = exp.var("ALL") 4682 4683 asc = self._match(TokenType.ASC) 4684 desc = self._match(TokenType.DESC) or (asc and False) 4685 4686 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4687 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4688 4689 nulls_first = is_nulls_first or False 4690 explicitly_null_ordered = is_nulls_first or is_nulls_last 4691 4692 if ( 4693 not explicitly_null_ordered 4694 and ( 4695 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4696 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4697 ) 4698 and self.dialect.NULL_ORDERING != "nulls_are_last" 4699 ): 4700 nulls_first = True 4701 4702 if self._match_text_seq("WITH", "FILL"): 4703 with_fill = self.expression( 4704 exp.WithFill, 4705 **{ # type: ignore 4706 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4707 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4708 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4709 "interpolate": self._parse_interpolate(), 4710 }, 4711 ) 4712 else: 4713 with_fill = None 4714 4715 return self.expression( 4716 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4717 ) 4718 4719 def _parse_limit_options(self) -> exp.LimitOptions: 4720 percent = self._match(TokenType.PERCENT) 4721 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4722 self._match_text_seq("ONLY") 4723 with_ties = self._match_text_seq("WITH", "TIES") 4724 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4725 4726 def _parse_limit( 4727 self, 4728 this: t.Optional[exp.Expression] = None, 4729 top: bool = False, 4730 skip_limit_token: bool = False, 4731 ) -> t.Optional[exp.Expression]: 4732 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4733 comments = self._prev_comments 4734 if top: 4735 limit_paren = self._match(TokenType.L_PAREN) 4736 expression = self._parse_term() if limit_paren else self._parse_number() 4737 4738 if limit_paren: 4739 self._match_r_paren() 4740 4741 limit_options = self._parse_limit_options() 4742 else: 4743 limit_options = None 4744 expression = self._parse_term() 4745 4746 if self._match(TokenType.COMMA): 4747 offset = expression 4748 expression = self._parse_term() 4749 else: 4750 offset = None 4751 4752 limit_exp = self.expression( 4753 exp.Limit, 4754 this=this, 4755 expression=expression, 4756 offset=offset, 4757 comments=comments, 4758 limit_options=limit_options, 4759 expressions=self._parse_limit_by(), 4760 ) 4761 4762 return limit_exp 4763 4764 if self._match(TokenType.FETCH): 4765 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4766 direction = self._prev.text.upper() if direction else "FIRST" 4767 4768 count = self._parse_field(tokens=self.FETCH_TOKENS) 4769 4770 return self.expression( 4771 exp.Fetch, 4772 direction=direction, 4773 count=count, 4774 limit_options=self._parse_limit_options(), 4775 ) 4776 4777 return this 4778 4779 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4780 if not self._match(TokenType.OFFSET): 4781 return this 4782 4783 count = self._parse_term() 4784 self._match_set((TokenType.ROW, TokenType.ROWS)) 4785 4786 return self.expression( 4787 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4788 ) 4789 4790 def _can_parse_limit_or_offset(self) -> bool: 4791 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4792 return False 4793 4794 index = self._index 4795 result = bool( 4796 self._try_parse(self._parse_limit, retreat=True) 4797 or self._try_parse(self._parse_offset, retreat=True) 4798 ) 4799 self._retreat(index) 4800 return result 4801 4802 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4803 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4804 4805 def _parse_locks(self) -> t.List[exp.Lock]: 4806 locks = [] 4807 while True: 4808 update, key = None, None 4809 if self._match_text_seq("FOR", "UPDATE"): 4810 update = True 4811 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4812 "LOCK", "IN", "SHARE", "MODE" 4813 ): 4814 update = False 4815 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4816 update, key = False, True 4817 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4818 update, key = True, True 4819 else: 4820 break 4821 4822 expressions = None 4823 if self._match_text_seq("OF"): 4824 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4825 4826 wait: t.Optional[bool | exp.Expression] = None 4827 if self._match_text_seq("NOWAIT"): 4828 wait = True 4829 elif self._match_text_seq("WAIT"): 4830 wait = self._parse_primary() 4831 elif self._match_text_seq("SKIP", "LOCKED"): 4832 wait = False 4833 4834 locks.append( 4835 self.expression( 4836 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4837 ) 4838 ) 4839 4840 return locks 4841 4842 def parse_set_operation( 4843 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4844 ) -> t.Optional[exp.Expression]: 4845 start = self._index 4846 _, side_token, kind_token = self._parse_join_parts() 4847 4848 side = side_token.text if side_token else None 4849 kind = kind_token.text if kind_token else None 4850 4851 if not self._match_set(self.SET_OPERATIONS): 4852 self._retreat(start) 4853 return None 4854 4855 token_type = self._prev.token_type 4856 4857 if token_type == TokenType.UNION: 4858 operation: t.Type[exp.SetOperation] = exp.Union 4859 elif token_type == TokenType.EXCEPT: 4860 operation = exp.Except 4861 else: 4862 operation = exp.Intersect 4863 4864 comments = self._prev.comments 4865 4866 if self._match(TokenType.DISTINCT): 4867 distinct: t.Optional[bool] = True 4868 elif self._match(TokenType.ALL): 4869 distinct = False 4870 else: 4871 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4872 if distinct is None: 4873 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4874 4875 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4876 "STRICT", "CORRESPONDING" 4877 ) 4878 if self._match_text_seq("CORRESPONDING"): 4879 by_name = True 4880 if not side and not kind: 4881 kind = "INNER" 4882 4883 on_column_list = None 4884 if by_name and self._match_texts(("ON", "BY")): 4885 on_column_list = self._parse_wrapped_csv(self._parse_column) 4886 4887 expression = self._parse_select( 4888 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4889 ) 4890 4891 return self.expression( 4892 operation, 4893 comments=comments, 4894 this=this, 4895 distinct=distinct, 4896 by_name=by_name, 4897 expression=expression, 4898 side=side, 4899 kind=kind, 4900 on=on_column_list, 4901 ) 4902 4903 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4904 while this: 4905 setop = self.parse_set_operation(this) 4906 if not setop: 4907 break 4908 this = setop 4909 4910 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4911 expression = this.expression 4912 4913 if expression: 4914 for arg in self.SET_OP_MODIFIERS: 4915 expr = expression.args.get(arg) 4916 if expr: 4917 this.set(arg, expr.pop()) 4918 4919 return this 4920 4921 def _parse_expression(self) -> t.Optional[exp.Expression]: 4922 return self._parse_alias(self._parse_assignment()) 4923 4924 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4925 this = self._parse_disjunction() 4926 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4927 # This allows us to parse <non-identifier token> := <expr> 4928 this = exp.column( 4929 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4930 ) 4931 4932 while self._match_set(self.ASSIGNMENT): 4933 if isinstance(this, exp.Column) and len(this.parts) == 1: 4934 this = this.this 4935 4936 this = self.expression( 4937 self.ASSIGNMENT[self._prev.token_type], 4938 this=this, 4939 comments=self._prev_comments, 4940 expression=self._parse_assignment(), 4941 ) 4942 4943 return this 4944 4945 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4946 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4947 4948 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4949 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4950 4951 def _parse_equality(self) -> t.Optional[exp.Expression]: 4952 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4953 4954 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4955 return self._parse_tokens(self._parse_range, self.COMPARISON) 4956 4957 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4958 this = this or self._parse_bitwise() 4959 negate = self._match(TokenType.NOT) 4960 4961 if self._match_set(self.RANGE_PARSERS): 4962 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4963 if not expression: 4964 return this 4965 4966 this = expression 4967 elif self._match(TokenType.ISNULL): 4968 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4969 4970 # Postgres supports ISNULL and NOTNULL for conditions. 4971 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4972 if self._match(TokenType.NOTNULL): 4973 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4974 this = self.expression(exp.Not, this=this) 4975 4976 if negate: 4977 this = self._negate_range(this) 4978 4979 if self._match(TokenType.IS): 4980 this = self._parse_is(this) 4981 4982 return this 4983 4984 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4985 if not this: 4986 return this 4987 4988 return self.expression(exp.Not, this=this) 4989 4990 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4991 index = self._index - 1 4992 negate = self._match(TokenType.NOT) 4993 4994 if self._match_text_seq("DISTINCT", "FROM"): 4995 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4996 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4997 4998 if self._match(TokenType.JSON): 4999 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5000 5001 if self._match_text_seq("WITH"): 5002 _with = True 5003 elif self._match_text_seq("WITHOUT"): 5004 _with = False 5005 else: 5006 _with = None 5007 5008 unique = self._match(TokenType.UNIQUE) 5009 self._match_text_seq("KEYS") 5010 expression: t.Optional[exp.Expression] = self.expression( 5011 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5012 ) 5013 else: 5014 expression = self._parse_primary() or self._parse_null() 5015 if not expression: 5016 self._retreat(index) 5017 return None 5018 5019 this = self.expression(exp.Is, this=this, expression=expression) 5020 return self.expression(exp.Not, this=this) if negate else this 5021 5022 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5023 unnest = self._parse_unnest(with_alias=False) 5024 if unnest: 5025 this = self.expression(exp.In, this=this, unnest=unnest) 5026 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5027 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5028 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5029 5030 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5031 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5032 else: 5033 this = self.expression(exp.In, this=this, expressions=expressions) 5034 5035 if matched_l_paren: 5036 self._match_r_paren(this) 5037 elif not self._match(TokenType.R_BRACKET, expression=this): 5038 self.raise_error("Expecting ]") 5039 else: 5040 this = self.expression(exp.In, this=this, field=self._parse_column()) 5041 5042 return this 5043 5044 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5045 symmetric = None 5046 if self._match_text_seq("SYMMETRIC"): 5047 symmetric = True 5048 elif self._match_text_seq("ASYMMETRIC"): 5049 symmetric = False 5050 5051 low = self._parse_bitwise() 5052 self._match(TokenType.AND) 5053 high = self._parse_bitwise() 5054 5055 return self.expression( 5056 exp.Between, 5057 this=this, 5058 low=low, 5059 high=high, 5060 symmetric=symmetric, 5061 ) 5062 5063 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5064 if not self._match(TokenType.ESCAPE): 5065 return this 5066 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5067 5068 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5069 index = self._index 5070 5071 if not self._match(TokenType.INTERVAL) and match_interval: 5072 return None 5073 5074 if self._match(TokenType.STRING, advance=False): 5075 this = self._parse_primary() 5076 else: 5077 this = self._parse_term() 5078 5079 if not this or ( 5080 isinstance(this, exp.Column) 5081 and not this.table 5082 and not this.this.quoted 5083 and this.name.upper() == "IS" 5084 ): 5085 self._retreat(index) 5086 return None 5087 5088 unit = self._parse_function() or ( 5089 not self._match(TokenType.ALIAS, advance=False) 5090 and self._parse_var(any_token=True, upper=True) 5091 ) 5092 5093 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5094 # each INTERVAL expression into this canonical form so it's easy to transpile 5095 if this and this.is_number: 5096 this = exp.Literal.string(this.to_py()) 5097 elif this and this.is_string: 5098 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5099 if parts and unit: 5100 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5101 unit = None 5102 self._retreat(self._index - 1) 5103 5104 if len(parts) == 1: 5105 this = exp.Literal.string(parts[0][0]) 5106 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5107 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5108 unit = self.expression( 5109 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5110 ) 5111 5112 interval = self.expression(exp.Interval, this=this, unit=unit) 5113 5114 index = self._index 5115 self._match(TokenType.PLUS) 5116 5117 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5118 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5119 return self.expression( 5120 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5121 ) 5122 5123 self._retreat(index) 5124 return interval 5125 5126 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5127 this = self._parse_term() 5128 5129 while True: 5130 if self._match_set(self.BITWISE): 5131 this = self.expression( 5132 self.BITWISE[self._prev.token_type], 5133 this=this, 5134 expression=self._parse_term(), 5135 ) 5136 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5137 this = self.expression( 5138 exp.DPipe, 5139 this=this, 5140 expression=self._parse_term(), 5141 safe=not self.dialect.STRICT_STRING_CONCAT, 5142 ) 5143 elif self._match(TokenType.DQMARK): 5144 this = self.expression( 5145 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5146 ) 5147 elif self._match_pair(TokenType.LT, TokenType.LT): 5148 this = self.expression( 5149 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5150 ) 5151 elif self._match_pair(TokenType.GT, TokenType.GT): 5152 this = self.expression( 5153 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5154 ) 5155 else: 5156 break 5157 5158 return this 5159 5160 def _parse_term(self) -> t.Optional[exp.Expression]: 5161 this = self._parse_factor() 5162 5163 while self._match_set(self.TERM): 5164 klass = self.TERM[self._prev.token_type] 5165 comments = self._prev_comments 5166 expression = self._parse_factor() 5167 5168 this = self.expression(klass, this=this, comments=comments, expression=expression) 5169 5170 if isinstance(this, exp.Collate): 5171 expr = this.expression 5172 5173 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5174 # fallback to Identifier / Var 5175 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5176 ident = expr.this 5177 if isinstance(ident, exp.Identifier): 5178 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5179 5180 return this 5181 5182 def _parse_factor(self) -> t.Optional[exp.Expression]: 5183 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5184 this = parse_method() 5185 5186 while self._match_set(self.FACTOR): 5187 klass = self.FACTOR[self._prev.token_type] 5188 comments = self._prev_comments 5189 expression = parse_method() 5190 5191 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5192 self._retreat(self._index - 1) 5193 return this 5194 5195 this = self.expression(klass, this=this, comments=comments, expression=expression) 5196 5197 if isinstance(this, exp.Div): 5198 this.args["typed"] = self.dialect.TYPED_DIVISION 5199 this.args["safe"] = self.dialect.SAFE_DIVISION 5200 5201 return this 5202 5203 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5204 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5205 5206 def _parse_unary(self) -> t.Optional[exp.Expression]: 5207 if self._match_set(self.UNARY_PARSERS): 5208 return self.UNARY_PARSERS[self._prev.token_type](self) 5209 return self._parse_at_time_zone(self._parse_type()) 5210 5211 def _parse_type( 5212 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5213 ) -> t.Optional[exp.Expression]: 5214 interval = parse_interval and self._parse_interval() 5215 if interval: 5216 return interval 5217 5218 index = self._index 5219 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5220 5221 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5222 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5223 if isinstance(data_type, exp.Cast): 5224 # This constructor can contain ops directly after it, for instance struct unnesting: 5225 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5226 return self._parse_column_ops(data_type) 5227 5228 if data_type: 5229 index2 = self._index 5230 this = self._parse_primary() 5231 5232 if isinstance(this, exp.Literal): 5233 literal = this.name 5234 this = self._parse_column_ops(this) 5235 5236 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5237 if parser: 5238 return parser(self, this, data_type) 5239 5240 if ( 5241 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5242 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5243 and TIME_ZONE_RE.search(literal) 5244 ): 5245 data_type = exp.DataType.build("TIMESTAMPTZ") 5246 5247 return self.expression(exp.Cast, this=this, to=data_type) 5248 5249 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5250 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5251 # 5252 # If the index difference here is greater than 1, that means the parser itself must have 5253 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5254 # 5255 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5256 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5257 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5258 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5259 # 5260 # In these cases, we don't really want to return the converted type, but instead retreat 5261 # and try to parse a Column or Identifier in the section below. 5262 if data_type.expressions and index2 - index > 1: 5263 self._retreat(index2) 5264 return self._parse_column_ops(data_type) 5265 5266 self._retreat(index) 5267 5268 if fallback_to_identifier: 5269 return self._parse_id_var() 5270 5271 this = self._parse_column() 5272 return this and self._parse_column_ops(this) 5273 5274 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5275 this = self._parse_type() 5276 if not this: 5277 return None 5278 5279 if isinstance(this, exp.Column) and not this.table: 5280 this = exp.var(this.name.upper()) 5281 5282 return self.expression( 5283 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5284 ) 5285 5286 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5287 type_name = identifier.name 5288 5289 while self._match(TokenType.DOT): 5290 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5291 5292 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5293 5294 def _parse_types( 5295 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5296 ) -> t.Optional[exp.Expression]: 5297 index = self._index 5298 5299 this: t.Optional[exp.Expression] = None 5300 prefix = self._match_text_seq("SYSUDTLIB", ".") 5301 5302 if self._match_set(self.TYPE_TOKENS): 5303 type_token = self._prev.token_type 5304 else: 5305 type_token = None 5306 identifier = allow_identifiers and self._parse_id_var( 5307 any_token=False, tokens=(TokenType.VAR,) 5308 ) 5309 if isinstance(identifier, exp.Identifier): 5310 try: 5311 tokens = self.dialect.tokenize(identifier.name) 5312 except TokenError: 5313 tokens = None 5314 5315 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5316 type_token = tokens[0].token_type 5317 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5318 this = self._parse_user_defined_type(identifier) 5319 else: 5320 self._retreat(self._index - 1) 5321 return None 5322 else: 5323 return None 5324 5325 if type_token == TokenType.PSEUDO_TYPE: 5326 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5327 5328 if type_token == TokenType.OBJECT_IDENTIFIER: 5329 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5330 5331 # https://materialize.com/docs/sql/types/map/ 5332 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5333 key_type = self._parse_types( 5334 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5335 ) 5336 if not self._match(TokenType.FARROW): 5337 self._retreat(index) 5338 return None 5339 5340 value_type = self._parse_types( 5341 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5342 ) 5343 if not self._match(TokenType.R_BRACKET): 5344 self._retreat(index) 5345 return None 5346 5347 return exp.DataType( 5348 this=exp.DataType.Type.MAP, 5349 expressions=[key_type, value_type], 5350 nested=True, 5351 prefix=prefix, 5352 ) 5353 5354 nested = type_token in self.NESTED_TYPE_TOKENS 5355 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5356 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5357 expressions = None 5358 maybe_func = False 5359 5360 if self._match(TokenType.L_PAREN): 5361 if is_struct: 5362 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5363 elif nested: 5364 expressions = self._parse_csv( 5365 lambda: self._parse_types( 5366 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5367 ) 5368 ) 5369 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5370 this = expressions[0] 5371 this.set("nullable", True) 5372 self._match_r_paren() 5373 return this 5374 elif type_token in self.ENUM_TYPE_TOKENS: 5375 expressions = self._parse_csv(self._parse_equality) 5376 elif is_aggregate: 5377 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5378 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5379 ) 5380 if not func_or_ident: 5381 return None 5382 expressions = [func_or_ident] 5383 if self._match(TokenType.COMMA): 5384 expressions.extend( 5385 self._parse_csv( 5386 lambda: self._parse_types( 5387 check_func=check_func, 5388 schema=schema, 5389 allow_identifiers=allow_identifiers, 5390 ) 5391 ) 5392 ) 5393 else: 5394 expressions = self._parse_csv(self._parse_type_size) 5395 5396 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5397 if type_token == TokenType.VECTOR and len(expressions) == 2: 5398 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5399 5400 if not self._match(TokenType.R_PAREN): 5401 self._retreat(index) 5402 return None 5403 5404 maybe_func = True 5405 5406 values: t.Optional[t.List[exp.Expression]] = None 5407 5408 if nested and self._match(TokenType.LT): 5409 if is_struct: 5410 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5411 else: 5412 expressions = self._parse_csv( 5413 lambda: self._parse_types( 5414 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5415 ) 5416 ) 5417 5418 if not self._match(TokenType.GT): 5419 self.raise_error("Expecting >") 5420 5421 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5422 values = self._parse_csv(self._parse_assignment) 5423 if not values and is_struct: 5424 values = None 5425 self._retreat(self._index - 1) 5426 else: 5427 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5428 5429 if type_token in self.TIMESTAMPS: 5430 if self._match_text_seq("WITH", "TIME", "ZONE"): 5431 maybe_func = False 5432 tz_type = ( 5433 exp.DataType.Type.TIMETZ 5434 if type_token in self.TIMES 5435 else exp.DataType.Type.TIMESTAMPTZ 5436 ) 5437 this = exp.DataType(this=tz_type, expressions=expressions) 5438 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5439 maybe_func = False 5440 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5441 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5442 maybe_func = False 5443 elif type_token == TokenType.INTERVAL: 5444 unit = self._parse_var(upper=True) 5445 if unit: 5446 if self._match_text_seq("TO"): 5447 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5448 5449 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5450 else: 5451 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5452 elif type_token == TokenType.VOID: 5453 this = exp.DataType(this=exp.DataType.Type.NULL) 5454 5455 if maybe_func and check_func: 5456 index2 = self._index 5457 peek = self._parse_string() 5458 5459 if not peek: 5460 self._retreat(index) 5461 return None 5462 5463 self._retreat(index2) 5464 5465 if not this: 5466 if self._match_text_seq("UNSIGNED"): 5467 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5468 if not unsigned_type_token: 5469 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5470 5471 type_token = unsigned_type_token or type_token 5472 5473 this = exp.DataType( 5474 this=exp.DataType.Type[type_token.value], 5475 expressions=expressions, 5476 nested=nested, 5477 prefix=prefix, 5478 ) 5479 5480 # Empty arrays/structs are allowed 5481 if values is not None: 5482 cls = exp.Struct if is_struct else exp.Array 5483 this = exp.cast(cls(expressions=values), this, copy=False) 5484 5485 elif expressions: 5486 this.set("expressions", expressions) 5487 5488 # https://materialize.com/docs/sql/types/list/#type-name 5489 while self._match(TokenType.LIST): 5490 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5491 5492 index = self._index 5493 5494 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5495 matched_array = self._match(TokenType.ARRAY) 5496 5497 while self._curr: 5498 datatype_token = self._prev.token_type 5499 matched_l_bracket = self._match(TokenType.L_BRACKET) 5500 5501 if (not matched_l_bracket and not matched_array) or ( 5502 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5503 ): 5504 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5505 # not to be confused with the fixed size array parsing 5506 break 5507 5508 matched_array = False 5509 values = self._parse_csv(self._parse_assignment) or None 5510 if ( 5511 values 5512 and not schema 5513 and ( 5514 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5515 ) 5516 ): 5517 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5518 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5519 self._retreat(index) 5520 break 5521 5522 this = exp.DataType( 5523 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5524 ) 5525 self._match(TokenType.R_BRACKET) 5526 5527 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5528 converter = self.TYPE_CONVERTERS.get(this.this) 5529 if converter: 5530 this = converter(t.cast(exp.DataType, this)) 5531 5532 return this 5533 5534 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5535 index = self._index 5536 5537 if ( 5538 self._curr 5539 and self._next 5540 and self._curr.token_type in self.TYPE_TOKENS 5541 and self._next.token_type in self.TYPE_TOKENS 5542 ): 5543 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5544 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5545 this = self._parse_id_var() 5546 else: 5547 this = ( 5548 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5549 or self._parse_id_var() 5550 ) 5551 5552 self._match(TokenType.COLON) 5553 5554 if ( 5555 type_required 5556 and not isinstance(this, exp.DataType) 5557 and not self._match_set(self.TYPE_TOKENS, advance=False) 5558 ): 5559 self._retreat(index) 5560 return self._parse_types() 5561 5562 return self._parse_column_def(this) 5563 5564 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5565 if not self._match_text_seq("AT", "TIME", "ZONE"): 5566 return this 5567 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5568 5569 def _parse_column(self) -> t.Optional[exp.Expression]: 5570 this = self._parse_column_reference() 5571 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5572 5573 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5574 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5575 5576 return column 5577 5578 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5579 this = self._parse_field() 5580 if ( 5581 not this 5582 and self._match(TokenType.VALUES, advance=False) 5583 and self.VALUES_FOLLOWED_BY_PAREN 5584 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5585 ): 5586 this = self._parse_id_var() 5587 5588 if isinstance(this, exp.Identifier): 5589 # We bubble up comments from the Identifier to the Column 5590 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5591 5592 return this 5593 5594 def _parse_colon_as_variant_extract( 5595 self, this: t.Optional[exp.Expression] 5596 ) -> t.Optional[exp.Expression]: 5597 casts = [] 5598 json_path = [] 5599 escape = None 5600 5601 while self._match(TokenType.COLON): 5602 start_index = self._index 5603 5604 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5605 path = self._parse_column_ops( 5606 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5607 ) 5608 5609 # The cast :: operator has a lower precedence than the extraction operator :, so 5610 # we rearrange the AST appropriately to avoid casting the JSON path 5611 while isinstance(path, exp.Cast): 5612 casts.append(path.to) 5613 path = path.this 5614 5615 if casts: 5616 dcolon_offset = next( 5617 i 5618 for i, t in enumerate(self._tokens[start_index:]) 5619 if t.token_type == TokenType.DCOLON 5620 ) 5621 end_token = self._tokens[start_index + dcolon_offset - 1] 5622 else: 5623 end_token = self._prev 5624 5625 if path: 5626 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5627 # it'll roundtrip to a string literal in GET_PATH 5628 if isinstance(path, exp.Identifier) and path.quoted: 5629 escape = True 5630 5631 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5632 5633 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5634 # Databricks transforms it back to the colon/dot notation 5635 if json_path: 5636 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5637 5638 if json_path_expr: 5639 json_path_expr.set("escape", escape) 5640 5641 this = self.expression( 5642 exp.JSONExtract, 5643 this=this, 5644 expression=json_path_expr, 5645 variant_extract=True, 5646 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5647 ) 5648 5649 while casts: 5650 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5651 5652 return this 5653 5654 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5655 return self._parse_types() 5656 5657 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5658 this = self._parse_bracket(this) 5659 5660 while self._match_set(self.COLUMN_OPERATORS): 5661 op_token = self._prev.token_type 5662 op = self.COLUMN_OPERATORS.get(op_token) 5663 5664 if op_token in self.CAST_COLUMN_OPERATORS: 5665 field = self._parse_dcolon() 5666 if not field: 5667 self.raise_error("Expected type") 5668 elif op and self._curr: 5669 field = self._parse_column_reference() or self._parse_bracket() 5670 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5671 field = self._parse_column_ops(field) 5672 else: 5673 field = self._parse_field(any_token=True, anonymous_func=True) 5674 5675 # Function calls can be qualified, e.g., x.y.FOO() 5676 # This converts the final AST to a series of Dots leading to the function call 5677 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5678 if isinstance(field, (exp.Func, exp.Window)) and this: 5679 this = this.transform( 5680 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5681 ) 5682 5683 if op: 5684 this = op(self, this, field) 5685 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5686 this = self.expression( 5687 exp.Column, 5688 comments=this.comments, 5689 this=field, 5690 table=this.this, 5691 db=this.args.get("table"), 5692 catalog=this.args.get("db"), 5693 ) 5694 elif isinstance(field, exp.Window): 5695 # Move the exp.Dot's to the window's function 5696 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5697 field.set("this", window_func) 5698 this = field 5699 else: 5700 this = self.expression(exp.Dot, this=this, expression=field) 5701 5702 if field and field.comments: 5703 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5704 5705 this = self._parse_bracket(this) 5706 5707 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5708 5709 def _parse_paren(self) -> t.Optional[exp.Expression]: 5710 if not self._match(TokenType.L_PAREN): 5711 return None 5712 5713 comments = self._prev_comments 5714 query = self._parse_select() 5715 5716 if query: 5717 expressions = [query] 5718 else: 5719 expressions = self._parse_expressions() 5720 5721 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5722 5723 if not this and self._match(TokenType.R_PAREN, advance=False): 5724 this = self.expression(exp.Tuple) 5725 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5726 this = self._parse_subquery(this=this, parse_alias=False) 5727 elif isinstance(this, exp.Subquery): 5728 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5729 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5730 this = self.expression(exp.Tuple, expressions=expressions) 5731 else: 5732 this = self.expression(exp.Paren, this=this) 5733 5734 if this: 5735 this.add_comments(comments) 5736 5737 self._match_r_paren(expression=this) 5738 return this 5739 5740 def _parse_primary(self) -> t.Optional[exp.Expression]: 5741 if self._match_set(self.PRIMARY_PARSERS): 5742 token_type = self._prev.token_type 5743 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5744 5745 if token_type == TokenType.STRING: 5746 expressions = [primary] 5747 while self._match(TokenType.STRING): 5748 expressions.append(exp.Literal.string(self._prev.text)) 5749 5750 if len(expressions) > 1: 5751 return self.expression(exp.Concat, expressions=expressions) 5752 5753 return primary 5754 5755 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5756 return exp.Literal.number(f"0.{self._prev.text}") 5757 5758 return self._parse_paren() 5759 5760 def _parse_field( 5761 self, 5762 any_token: bool = False, 5763 tokens: t.Optional[t.Collection[TokenType]] = None, 5764 anonymous_func: bool = False, 5765 ) -> t.Optional[exp.Expression]: 5766 if anonymous_func: 5767 field = ( 5768 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5769 or self._parse_primary() 5770 ) 5771 else: 5772 field = self._parse_primary() or self._parse_function( 5773 anonymous=anonymous_func, any_token=any_token 5774 ) 5775 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5776 5777 def _parse_function( 5778 self, 5779 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5780 anonymous: bool = False, 5781 optional_parens: bool = True, 5782 any_token: bool = False, 5783 ) -> t.Optional[exp.Expression]: 5784 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5785 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5786 fn_syntax = False 5787 if ( 5788 self._match(TokenType.L_BRACE, advance=False) 5789 and self._next 5790 and self._next.text.upper() == "FN" 5791 ): 5792 self._advance(2) 5793 fn_syntax = True 5794 5795 func = self._parse_function_call( 5796 functions=functions, 5797 anonymous=anonymous, 5798 optional_parens=optional_parens, 5799 any_token=any_token, 5800 ) 5801 5802 if fn_syntax: 5803 self._match(TokenType.R_BRACE) 5804 5805 return func 5806 5807 def _parse_function_call( 5808 self, 5809 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5810 anonymous: bool = False, 5811 optional_parens: bool = True, 5812 any_token: bool = False, 5813 ) -> t.Optional[exp.Expression]: 5814 if not self._curr: 5815 return None 5816 5817 comments = self._curr.comments 5818 prev = self._prev 5819 token = self._curr 5820 token_type = self._curr.token_type 5821 this = self._curr.text 5822 upper = this.upper() 5823 5824 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5825 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5826 self._advance() 5827 return self._parse_window(parser(self)) 5828 5829 if not self._next or self._next.token_type != TokenType.L_PAREN: 5830 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5831 self._advance() 5832 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5833 5834 return None 5835 5836 if any_token: 5837 if token_type in self.RESERVED_TOKENS: 5838 return None 5839 elif token_type not in self.FUNC_TOKENS: 5840 return None 5841 5842 self._advance(2) 5843 5844 parser = self.FUNCTION_PARSERS.get(upper) 5845 if parser and not anonymous: 5846 this = parser(self) 5847 else: 5848 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5849 5850 if subquery_predicate: 5851 expr = None 5852 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5853 expr = self._parse_select() 5854 self._match_r_paren() 5855 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5856 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5857 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5858 self._advance(-1) 5859 expr = self._parse_bitwise() 5860 5861 if expr: 5862 return self.expression(subquery_predicate, comments=comments, this=expr) 5863 5864 if functions is None: 5865 functions = self.FUNCTIONS 5866 5867 function = functions.get(upper) 5868 known_function = function and not anonymous 5869 5870 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5871 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5872 5873 post_func_comments = self._curr and self._curr.comments 5874 if known_function and post_func_comments: 5875 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5876 # call we'll construct it as exp.Anonymous, even if it's "known" 5877 if any( 5878 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5879 for comment in post_func_comments 5880 ): 5881 known_function = False 5882 5883 if alias and known_function: 5884 args = self._kv_to_prop_eq(args) 5885 5886 if known_function: 5887 func_builder = t.cast(t.Callable, function) 5888 5889 if "dialect" in func_builder.__code__.co_varnames: 5890 func = func_builder(args, dialect=self.dialect) 5891 else: 5892 func = func_builder(args) 5893 5894 func = self.validate_expression(func, args) 5895 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5896 func.meta["name"] = this 5897 5898 this = func 5899 else: 5900 if token_type == TokenType.IDENTIFIER: 5901 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5902 5903 this = self.expression(exp.Anonymous, this=this, expressions=args) 5904 this = this.update_positions(token) 5905 5906 if isinstance(this, exp.Expression): 5907 this.add_comments(comments) 5908 5909 self._match_r_paren(this) 5910 return self._parse_window(this) 5911 5912 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5913 return expression 5914 5915 def _kv_to_prop_eq( 5916 self, expressions: t.List[exp.Expression], parse_map: bool = False 5917 ) -> t.List[exp.Expression]: 5918 transformed = [] 5919 5920 for index, e in enumerate(expressions): 5921 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5922 if isinstance(e, exp.Alias): 5923 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5924 5925 if not isinstance(e, exp.PropertyEQ): 5926 e = self.expression( 5927 exp.PropertyEQ, 5928 this=e.this if parse_map else exp.to_identifier(e.this.name), 5929 expression=e.expression, 5930 ) 5931 5932 if isinstance(e.this, exp.Column): 5933 e.this.replace(e.this.this) 5934 else: 5935 e = self._to_prop_eq(e, index) 5936 5937 transformed.append(e) 5938 5939 return transformed 5940 5941 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5942 return self._parse_statement() 5943 5944 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5945 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5946 5947 def _parse_user_defined_function( 5948 self, kind: t.Optional[TokenType] = None 5949 ) -> t.Optional[exp.Expression]: 5950 this = self._parse_table_parts(schema=True) 5951 5952 if not self._match(TokenType.L_PAREN): 5953 return this 5954 5955 expressions = self._parse_csv(self._parse_function_parameter) 5956 self._match_r_paren() 5957 return self.expression( 5958 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5959 ) 5960 5961 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5962 literal = self._parse_primary() 5963 if literal: 5964 return self.expression(exp.Introducer, this=token.text, expression=literal) 5965 5966 return self._identifier_expression(token) 5967 5968 def _parse_session_parameter(self) -> exp.SessionParameter: 5969 kind = None 5970 this = self._parse_id_var() or self._parse_primary() 5971 5972 if this and self._match(TokenType.DOT): 5973 kind = this.name 5974 this = self._parse_var() or self._parse_primary() 5975 5976 return self.expression(exp.SessionParameter, this=this, kind=kind) 5977 5978 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5979 return self._parse_id_var() 5980 5981 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5982 index = self._index 5983 5984 if self._match(TokenType.L_PAREN): 5985 expressions = t.cast( 5986 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5987 ) 5988 5989 if not self._match(TokenType.R_PAREN): 5990 self._retreat(index) 5991 else: 5992 expressions = [self._parse_lambda_arg()] 5993 5994 if self._match_set(self.LAMBDAS): 5995 return self.LAMBDAS[self._prev.token_type](self, expressions) 5996 5997 self._retreat(index) 5998 5999 this: t.Optional[exp.Expression] 6000 6001 if self._match(TokenType.DISTINCT): 6002 this = self.expression( 6003 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6004 ) 6005 else: 6006 this = self._parse_select_or_expression(alias=alias) 6007 6008 return self._parse_limit( 6009 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6010 ) 6011 6012 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6013 index = self._index 6014 if not self._match(TokenType.L_PAREN): 6015 return this 6016 6017 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6018 # expr can be of both types 6019 if self._match_set(self.SELECT_START_TOKENS): 6020 self._retreat(index) 6021 return this 6022 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6023 self._match_r_paren() 6024 return self.expression(exp.Schema, this=this, expressions=args) 6025 6026 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6027 return self._parse_column_def(self._parse_field(any_token=True)) 6028 6029 def _parse_column_def( 6030 self, this: t.Optional[exp.Expression], computed_column: bool = True 6031 ) -> t.Optional[exp.Expression]: 6032 # column defs are not really columns, they're identifiers 6033 if isinstance(this, exp.Column): 6034 this = this.this 6035 6036 if not computed_column: 6037 self._match(TokenType.ALIAS) 6038 6039 kind = self._parse_types(schema=True) 6040 6041 if self._match_text_seq("FOR", "ORDINALITY"): 6042 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6043 6044 constraints: t.List[exp.Expression] = [] 6045 6046 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6047 ("ALIAS", "MATERIALIZED") 6048 ): 6049 persisted = self._prev.text.upper() == "MATERIALIZED" 6050 constraint_kind = exp.ComputedColumnConstraint( 6051 this=self._parse_assignment(), 6052 persisted=persisted or self._match_text_seq("PERSISTED"), 6053 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6054 ) 6055 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6056 elif ( 6057 kind 6058 and self._match(TokenType.ALIAS, advance=False) 6059 and ( 6060 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6061 or (self._next and self._next.token_type == TokenType.L_PAREN) 6062 ) 6063 ): 6064 self._advance() 6065 constraints.append( 6066 self.expression( 6067 exp.ColumnConstraint, 6068 kind=exp.ComputedColumnConstraint( 6069 this=self._parse_disjunction(), 6070 persisted=self._match_texts(("STORED", "VIRTUAL")) 6071 and self._prev.text.upper() == "STORED", 6072 ), 6073 ) 6074 ) 6075 6076 while True: 6077 constraint = self._parse_column_constraint() 6078 if not constraint: 6079 break 6080 constraints.append(constraint) 6081 6082 if not kind and not constraints: 6083 return this 6084 6085 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6086 6087 def _parse_auto_increment( 6088 self, 6089 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6090 start = None 6091 increment = None 6092 order = None 6093 6094 if self._match(TokenType.L_PAREN, advance=False): 6095 args = self._parse_wrapped_csv(self._parse_bitwise) 6096 start = seq_get(args, 0) 6097 increment = seq_get(args, 1) 6098 elif self._match_text_seq("START"): 6099 start = self._parse_bitwise() 6100 self._match_text_seq("INCREMENT") 6101 increment = self._parse_bitwise() 6102 if self._match_text_seq("ORDER"): 6103 order = True 6104 elif self._match_text_seq("NOORDER"): 6105 order = False 6106 6107 if start and increment: 6108 return exp.GeneratedAsIdentityColumnConstraint( 6109 start=start, increment=increment, this=False, order=order 6110 ) 6111 6112 return exp.AutoIncrementColumnConstraint() 6113 6114 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6115 if not self._match_text_seq("REFRESH"): 6116 self._retreat(self._index - 1) 6117 return None 6118 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6119 6120 def _parse_compress(self) -> exp.CompressColumnConstraint: 6121 if self._match(TokenType.L_PAREN, advance=False): 6122 return self.expression( 6123 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6124 ) 6125 6126 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6127 6128 def _parse_generated_as_identity( 6129 self, 6130 ) -> ( 6131 exp.GeneratedAsIdentityColumnConstraint 6132 | exp.ComputedColumnConstraint 6133 | exp.GeneratedAsRowColumnConstraint 6134 ): 6135 if self._match_text_seq("BY", "DEFAULT"): 6136 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6137 this = self.expression( 6138 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6139 ) 6140 else: 6141 self._match_text_seq("ALWAYS") 6142 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6143 6144 self._match(TokenType.ALIAS) 6145 6146 if self._match_text_seq("ROW"): 6147 start = self._match_text_seq("START") 6148 if not start: 6149 self._match(TokenType.END) 6150 hidden = self._match_text_seq("HIDDEN") 6151 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6152 6153 identity = self._match_text_seq("IDENTITY") 6154 6155 if self._match(TokenType.L_PAREN): 6156 if self._match(TokenType.START_WITH): 6157 this.set("start", self._parse_bitwise()) 6158 if self._match_text_seq("INCREMENT", "BY"): 6159 this.set("increment", self._parse_bitwise()) 6160 if self._match_text_seq("MINVALUE"): 6161 this.set("minvalue", self._parse_bitwise()) 6162 if self._match_text_seq("MAXVALUE"): 6163 this.set("maxvalue", self._parse_bitwise()) 6164 6165 if self._match_text_seq("CYCLE"): 6166 this.set("cycle", True) 6167 elif self._match_text_seq("NO", "CYCLE"): 6168 this.set("cycle", False) 6169 6170 if not identity: 6171 this.set("expression", self._parse_range()) 6172 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6173 args = self._parse_csv(self._parse_bitwise) 6174 this.set("start", seq_get(args, 0)) 6175 this.set("increment", seq_get(args, 1)) 6176 6177 self._match_r_paren() 6178 6179 return this 6180 6181 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6182 self._match_text_seq("LENGTH") 6183 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6184 6185 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6186 if self._match_text_seq("NULL"): 6187 return self.expression(exp.NotNullColumnConstraint) 6188 if self._match_text_seq("CASESPECIFIC"): 6189 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6190 if self._match_text_seq("FOR", "REPLICATION"): 6191 return self.expression(exp.NotForReplicationColumnConstraint) 6192 6193 # Unconsume the `NOT` token 6194 self._retreat(self._index - 1) 6195 return None 6196 6197 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6198 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6199 6200 procedure_option_follows = ( 6201 self._match(TokenType.WITH, advance=False) 6202 and self._next 6203 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6204 ) 6205 6206 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6207 return self.expression( 6208 exp.ColumnConstraint, 6209 this=this, 6210 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6211 ) 6212 6213 return this 6214 6215 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6216 if not self._match(TokenType.CONSTRAINT): 6217 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6218 6219 return self.expression( 6220 exp.Constraint, 6221 this=self._parse_id_var(), 6222 expressions=self._parse_unnamed_constraints(), 6223 ) 6224 6225 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6226 constraints = [] 6227 while True: 6228 constraint = self._parse_unnamed_constraint() or self._parse_function() 6229 if not constraint: 6230 break 6231 constraints.append(constraint) 6232 6233 return constraints 6234 6235 def _parse_unnamed_constraint( 6236 self, constraints: t.Optional[t.Collection[str]] = None 6237 ) -> t.Optional[exp.Expression]: 6238 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6239 constraints or self.CONSTRAINT_PARSERS 6240 ): 6241 return None 6242 6243 constraint = self._prev.text.upper() 6244 if constraint not in self.CONSTRAINT_PARSERS: 6245 self.raise_error(f"No parser found for schema constraint {constraint}.") 6246 6247 return self.CONSTRAINT_PARSERS[constraint](self) 6248 6249 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6250 return self._parse_id_var(any_token=False) 6251 6252 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6253 self._match_texts(("KEY", "INDEX")) 6254 return self.expression( 6255 exp.UniqueColumnConstraint, 6256 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6257 this=self._parse_schema(self._parse_unique_key()), 6258 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6259 on_conflict=self._parse_on_conflict(), 6260 options=self._parse_key_constraint_options(), 6261 ) 6262 6263 def _parse_key_constraint_options(self) -> t.List[str]: 6264 options = [] 6265 while True: 6266 if not self._curr: 6267 break 6268 6269 if self._match(TokenType.ON): 6270 action = None 6271 on = self._advance_any() and self._prev.text 6272 6273 if self._match_text_seq("NO", "ACTION"): 6274 action = "NO ACTION" 6275 elif self._match_text_seq("CASCADE"): 6276 action = "CASCADE" 6277 elif self._match_text_seq("RESTRICT"): 6278 action = "RESTRICT" 6279 elif self._match_pair(TokenType.SET, TokenType.NULL): 6280 action = "SET NULL" 6281 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6282 action = "SET DEFAULT" 6283 else: 6284 self.raise_error("Invalid key constraint") 6285 6286 options.append(f"ON {on} {action}") 6287 else: 6288 var = self._parse_var_from_options( 6289 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6290 ) 6291 if not var: 6292 break 6293 options.append(var.name) 6294 6295 return options 6296 6297 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6298 if match and not self._match(TokenType.REFERENCES): 6299 return None 6300 6301 expressions = None 6302 this = self._parse_table(schema=True) 6303 options = self._parse_key_constraint_options() 6304 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6305 6306 def _parse_foreign_key(self) -> exp.ForeignKey: 6307 expressions = ( 6308 self._parse_wrapped_id_vars() 6309 if not self._match(TokenType.REFERENCES, advance=False) 6310 else None 6311 ) 6312 reference = self._parse_references() 6313 on_options = {} 6314 6315 while self._match(TokenType.ON): 6316 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6317 self.raise_error("Expected DELETE or UPDATE") 6318 6319 kind = self._prev.text.lower() 6320 6321 if self._match_text_seq("NO", "ACTION"): 6322 action = "NO ACTION" 6323 elif self._match(TokenType.SET): 6324 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6325 action = "SET " + self._prev.text.upper() 6326 else: 6327 self._advance() 6328 action = self._prev.text.upper() 6329 6330 on_options[kind] = action 6331 6332 return self.expression( 6333 exp.ForeignKey, 6334 expressions=expressions, 6335 reference=reference, 6336 options=self._parse_key_constraint_options(), 6337 **on_options, # type: ignore 6338 ) 6339 6340 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6341 return self._parse_ordered() or self._parse_field() 6342 6343 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6344 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6345 self._retreat(self._index - 1) 6346 return None 6347 6348 id_vars = self._parse_wrapped_id_vars() 6349 return self.expression( 6350 exp.PeriodForSystemTimeConstraint, 6351 this=seq_get(id_vars, 0), 6352 expression=seq_get(id_vars, 1), 6353 ) 6354 6355 def _parse_primary_key( 6356 self, wrapped_optional: bool = False, in_props: bool = False 6357 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6358 desc = ( 6359 self._match_set((TokenType.ASC, TokenType.DESC)) 6360 and self._prev.token_type == TokenType.DESC 6361 ) 6362 6363 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6364 return self.expression( 6365 exp.PrimaryKeyColumnConstraint, 6366 desc=desc, 6367 options=self._parse_key_constraint_options(), 6368 ) 6369 6370 expressions = self._parse_wrapped_csv( 6371 self._parse_primary_key_part, optional=wrapped_optional 6372 ) 6373 6374 return self.expression( 6375 exp.PrimaryKey, 6376 expressions=expressions, 6377 include=self._parse_index_params(), 6378 options=self._parse_key_constraint_options(), 6379 ) 6380 6381 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6382 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6383 6384 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6385 """ 6386 Parses a datetime column in ODBC format. We parse the column into the corresponding 6387 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6388 same as we did for `DATE('yyyy-mm-dd')`. 6389 6390 Reference: 6391 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6392 """ 6393 self._match(TokenType.VAR) 6394 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6395 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6396 if not self._match(TokenType.R_BRACE): 6397 self.raise_error("Expected }") 6398 return expression 6399 6400 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6401 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6402 return this 6403 6404 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6405 map_token = seq_get(self._tokens, self._index - 2) 6406 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6407 else: 6408 parse_map = False 6409 6410 bracket_kind = self._prev.token_type 6411 if ( 6412 bracket_kind == TokenType.L_BRACE 6413 and self._curr 6414 and self._curr.token_type == TokenType.VAR 6415 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6416 ): 6417 return self._parse_odbc_datetime_literal() 6418 6419 expressions = self._parse_csv( 6420 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6421 ) 6422 6423 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6424 self.raise_error("Expected ]") 6425 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6426 self.raise_error("Expected }") 6427 6428 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6429 if bracket_kind == TokenType.L_BRACE: 6430 this = self.expression( 6431 exp.Struct, 6432 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6433 ) 6434 elif not this: 6435 this = build_array_constructor( 6436 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6437 ) 6438 else: 6439 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6440 if constructor_type: 6441 return build_array_constructor( 6442 constructor_type, 6443 args=expressions, 6444 bracket_kind=bracket_kind, 6445 dialect=self.dialect, 6446 ) 6447 6448 expressions = apply_index_offset( 6449 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6450 ) 6451 this = self.expression( 6452 exp.Bracket, 6453 this=this, 6454 expressions=expressions, 6455 comments=this.pop_comments(), 6456 ) 6457 6458 self._add_comments(this) 6459 return self._parse_bracket(this) 6460 6461 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6462 if self._match(TokenType.COLON): 6463 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6464 return this 6465 6466 def _parse_case(self) -> t.Optional[exp.Expression]: 6467 if self._match(TokenType.DOT, advance=False): 6468 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6469 self._retreat(self._index - 1) 6470 return None 6471 6472 ifs = [] 6473 default = None 6474 6475 comments = self._prev_comments 6476 expression = self._parse_assignment() 6477 6478 while self._match(TokenType.WHEN): 6479 this = self._parse_assignment() 6480 self._match(TokenType.THEN) 6481 then = self._parse_assignment() 6482 ifs.append(self.expression(exp.If, this=this, true=then)) 6483 6484 if self._match(TokenType.ELSE): 6485 default = self._parse_assignment() 6486 6487 if not self._match(TokenType.END): 6488 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6489 default = exp.column("interval") 6490 else: 6491 self.raise_error("Expected END after CASE", self._prev) 6492 6493 return self.expression( 6494 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6495 ) 6496 6497 def _parse_if(self) -> t.Optional[exp.Expression]: 6498 if self._match(TokenType.L_PAREN): 6499 args = self._parse_csv( 6500 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6501 ) 6502 this = self.validate_expression(exp.If.from_arg_list(args), args) 6503 self._match_r_paren() 6504 else: 6505 index = self._index - 1 6506 6507 if self.NO_PAREN_IF_COMMANDS and index == 0: 6508 return self._parse_as_command(self._prev) 6509 6510 condition = self._parse_assignment() 6511 6512 if not condition: 6513 self._retreat(index) 6514 return None 6515 6516 self._match(TokenType.THEN) 6517 true = self._parse_assignment() 6518 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6519 self._match(TokenType.END) 6520 this = self.expression(exp.If, this=condition, true=true, false=false) 6521 6522 return this 6523 6524 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6525 if not self._match_text_seq("VALUE", "FOR"): 6526 self._retreat(self._index - 1) 6527 return None 6528 6529 return self.expression( 6530 exp.NextValueFor, 6531 this=self._parse_column(), 6532 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6533 ) 6534 6535 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6536 this = self._parse_function() or self._parse_var_or_string(upper=True) 6537 6538 if self._match(TokenType.FROM): 6539 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6540 6541 if not self._match(TokenType.COMMA): 6542 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6543 6544 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6545 6546 def _parse_gap_fill(self) -> exp.GapFill: 6547 self._match(TokenType.TABLE) 6548 this = self._parse_table() 6549 6550 self._match(TokenType.COMMA) 6551 args = [this, *self._parse_csv(self._parse_lambda)] 6552 6553 gap_fill = exp.GapFill.from_arg_list(args) 6554 return self.validate_expression(gap_fill, args) 6555 6556 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6557 this = self._parse_assignment() 6558 6559 if not self._match(TokenType.ALIAS): 6560 if self._match(TokenType.COMMA): 6561 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6562 6563 self.raise_error("Expected AS after CAST") 6564 6565 fmt = None 6566 to = self._parse_types() 6567 6568 default = self._match(TokenType.DEFAULT) 6569 if default: 6570 default = self._parse_bitwise() 6571 self._match_text_seq("ON", "CONVERSION", "ERROR") 6572 6573 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6574 fmt_string = self._parse_string() 6575 fmt = self._parse_at_time_zone(fmt_string) 6576 6577 if not to: 6578 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6579 if to.this in exp.DataType.TEMPORAL_TYPES: 6580 this = self.expression( 6581 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6582 this=this, 6583 format=exp.Literal.string( 6584 format_time( 6585 fmt_string.this if fmt_string else "", 6586 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6587 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6588 ) 6589 ), 6590 safe=safe, 6591 ) 6592 6593 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6594 this.set("zone", fmt.args["zone"]) 6595 return this 6596 elif not to: 6597 self.raise_error("Expected TYPE after CAST") 6598 elif isinstance(to, exp.Identifier): 6599 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6600 elif to.this == exp.DataType.Type.CHAR: 6601 if self._match(TokenType.CHARACTER_SET): 6602 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6603 6604 return self.build_cast( 6605 strict=strict, 6606 this=this, 6607 to=to, 6608 format=fmt, 6609 safe=safe, 6610 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6611 default=default, 6612 ) 6613 6614 def _parse_string_agg(self) -> exp.GroupConcat: 6615 if self._match(TokenType.DISTINCT): 6616 args: t.List[t.Optional[exp.Expression]] = [ 6617 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6618 ] 6619 if self._match(TokenType.COMMA): 6620 args.extend(self._parse_csv(self._parse_assignment)) 6621 else: 6622 args = self._parse_csv(self._parse_assignment) # type: ignore 6623 6624 if self._match_text_seq("ON", "OVERFLOW"): 6625 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6626 if self._match_text_seq("ERROR"): 6627 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6628 else: 6629 self._match_text_seq("TRUNCATE") 6630 on_overflow = self.expression( 6631 exp.OverflowTruncateBehavior, 6632 this=self._parse_string(), 6633 with_count=( 6634 self._match_text_seq("WITH", "COUNT") 6635 or not self._match_text_seq("WITHOUT", "COUNT") 6636 ), 6637 ) 6638 else: 6639 on_overflow = None 6640 6641 index = self._index 6642 if not self._match(TokenType.R_PAREN) and args: 6643 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6644 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6645 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6646 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6647 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6648 6649 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6650 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6651 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6652 if not self._match_text_seq("WITHIN", "GROUP"): 6653 self._retreat(index) 6654 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6655 6656 # The corresponding match_r_paren will be called in parse_function (caller) 6657 self._match_l_paren() 6658 6659 return self.expression( 6660 exp.GroupConcat, 6661 this=self._parse_order(this=seq_get(args, 0)), 6662 separator=seq_get(args, 1), 6663 on_overflow=on_overflow, 6664 ) 6665 6666 def _parse_convert( 6667 self, strict: bool, safe: t.Optional[bool] = None 6668 ) -> t.Optional[exp.Expression]: 6669 this = self._parse_bitwise() 6670 6671 if self._match(TokenType.USING): 6672 to: t.Optional[exp.Expression] = self.expression( 6673 exp.CharacterSet, this=self._parse_var() 6674 ) 6675 elif self._match(TokenType.COMMA): 6676 to = self._parse_types() 6677 else: 6678 to = None 6679 6680 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6681 6682 def _parse_xml_table(self) -> exp.XMLTable: 6683 namespaces = None 6684 passing = None 6685 columns = None 6686 6687 if self._match_text_seq("XMLNAMESPACES", "("): 6688 namespaces = self._parse_xml_namespace() 6689 self._match_text_seq(")", ",") 6690 6691 this = self._parse_string() 6692 6693 if self._match_text_seq("PASSING"): 6694 # The BY VALUE keywords are optional and are provided for semantic clarity 6695 self._match_text_seq("BY", "VALUE") 6696 passing = self._parse_csv(self._parse_column) 6697 6698 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6699 6700 if self._match_text_seq("COLUMNS"): 6701 columns = self._parse_csv(self._parse_field_def) 6702 6703 return self.expression( 6704 exp.XMLTable, 6705 this=this, 6706 namespaces=namespaces, 6707 passing=passing, 6708 columns=columns, 6709 by_ref=by_ref, 6710 ) 6711 6712 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6713 namespaces = [] 6714 6715 while True: 6716 if self._match(TokenType.DEFAULT): 6717 uri = self._parse_string() 6718 else: 6719 uri = self._parse_alias(self._parse_string()) 6720 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6721 if not self._match(TokenType.COMMA): 6722 break 6723 6724 return namespaces 6725 6726 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6727 args = self._parse_csv(self._parse_assignment) 6728 6729 if len(args) < 3: 6730 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6731 6732 return self.expression(exp.DecodeCase, expressions=args) 6733 6734 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6735 self._match_text_seq("KEY") 6736 key = self._parse_column() 6737 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6738 self._match_text_seq("VALUE") 6739 value = self._parse_bitwise() 6740 6741 if not key and not value: 6742 return None 6743 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6744 6745 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6746 if not this or not self._match_text_seq("FORMAT", "JSON"): 6747 return this 6748 6749 return self.expression(exp.FormatJson, this=this) 6750 6751 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6752 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6753 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6754 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6755 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6756 else: 6757 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6758 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6759 6760 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6761 6762 if not empty and not error and not null: 6763 return None 6764 6765 return self.expression( 6766 exp.OnCondition, 6767 empty=empty, 6768 error=error, 6769 null=null, 6770 ) 6771 6772 def _parse_on_handling( 6773 self, on: str, *values: str 6774 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6775 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6776 for value in values: 6777 if self._match_text_seq(value, "ON", on): 6778 return f"{value} ON {on}" 6779 6780 index = self._index 6781 if self._match(TokenType.DEFAULT): 6782 default_value = self._parse_bitwise() 6783 if self._match_text_seq("ON", on): 6784 return default_value 6785 6786 self._retreat(index) 6787 6788 return None 6789 6790 @t.overload 6791 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6792 6793 @t.overload 6794 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6795 6796 def _parse_json_object(self, agg=False): 6797 star = self._parse_star() 6798 expressions = ( 6799 [star] 6800 if star 6801 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6802 ) 6803 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6804 6805 unique_keys = None 6806 if self._match_text_seq("WITH", "UNIQUE"): 6807 unique_keys = True 6808 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6809 unique_keys = False 6810 6811 self._match_text_seq("KEYS") 6812 6813 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6814 self._parse_type() 6815 ) 6816 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6817 6818 return self.expression( 6819 exp.JSONObjectAgg if agg else exp.JSONObject, 6820 expressions=expressions, 6821 null_handling=null_handling, 6822 unique_keys=unique_keys, 6823 return_type=return_type, 6824 encoding=encoding, 6825 ) 6826 6827 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6828 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6829 if not self._match_text_seq("NESTED"): 6830 this = self._parse_id_var() 6831 kind = self._parse_types(allow_identifiers=False) 6832 nested = None 6833 else: 6834 this = None 6835 kind = None 6836 nested = True 6837 6838 path = self._match_text_seq("PATH") and self._parse_string() 6839 nested_schema = nested and self._parse_json_schema() 6840 6841 return self.expression( 6842 exp.JSONColumnDef, 6843 this=this, 6844 kind=kind, 6845 path=path, 6846 nested_schema=nested_schema, 6847 ) 6848 6849 def _parse_json_schema(self) -> exp.JSONSchema: 6850 self._match_text_seq("COLUMNS") 6851 return self.expression( 6852 exp.JSONSchema, 6853 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6854 ) 6855 6856 def _parse_json_table(self) -> exp.JSONTable: 6857 this = self._parse_format_json(self._parse_bitwise()) 6858 path = self._match(TokenType.COMMA) and self._parse_string() 6859 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6860 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6861 schema = self._parse_json_schema() 6862 6863 return exp.JSONTable( 6864 this=this, 6865 schema=schema, 6866 path=path, 6867 error_handling=error_handling, 6868 empty_handling=empty_handling, 6869 ) 6870 6871 def _parse_match_against(self) -> exp.MatchAgainst: 6872 if self._match_text_seq("TABLE"): 6873 # parse SingleStore MATCH(TABLE ...) syntax 6874 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6875 expressions = [] 6876 table = self._parse_table() 6877 if table: 6878 expressions = [table] 6879 else: 6880 expressions = self._parse_csv(self._parse_column) 6881 6882 self._match_text_seq(")", "AGAINST", "(") 6883 6884 this = self._parse_string() 6885 6886 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6887 modifier = "IN NATURAL LANGUAGE MODE" 6888 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6889 modifier = f"{modifier} WITH QUERY EXPANSION" 6890 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6891 modifier = "IN BOOLEAN MODE" 6892 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6893 modifier = "WITH QUERY EXPANSION" 6894 else: 6895 modifier = None 6896 6897 return self.expression( 6898 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6899 ) 6900 6901 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6902 def _parse_open_json(self) -> exp.OpenJSON: 6903 this = self._parse_bitwise() 6904 path = self._match(TokenType.COMMA) and self._parse_string() 6905 6906 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6907 this = self._parse_field(any_token=True) 6908 kind = self._parse_types() 6909 path = self._parse_string() 6910 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6911 6912 return self.expression( 6913 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6914 ) 6915 6916 expressions = None 6917 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6918 self._match_l_paren() 6919 expressions = self._parse_csv(_parse_open_json_column_def) 6920 6921 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6922 6923 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6924 args = self._parse_csv(self._parse_bitwise) 6925 6926 if self._match(TokenType.IN): 6927 return self.expression( 6928 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6929 ) 6930 6931 if haystack_first: 6932 haystack = seq_get(args, 0) 6933 needle = seq_get(args, 1) 6934 else: 6935 haystack = seq_get(args, 1) 6936 needle = seq_get(args, 0) 6937 6938 return self.expression( 6939 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6940 ) 6941 6942 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6943 args = self._parse_csv(self._parse_table) 6944 return exp.JoinHint(this=func_name.upper(), expressions=args) 6945 6946 def _parse_substring(self) -> exp.Substring: 6947 # Postgres supports the form: substring(string [from int] [for int]) 6948 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6949 6950 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6951 6952 if self._match(TokenType.FROM): 6953 args.append(self._parse_bitwise()) 6954 if self._match(TokenType.FOR): 6955 if len(args) == 1: 6956 args.append(exp.Literal.number(1)) 6957 args.append(self._parse_bitwise()) 6958 6959 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6960 6961 def _parse_trim(self) -> exp.Trim: 6962 # https://www.w3resource.com/sql/character-functions/trim.php 6963 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6964 6965 position = None 6966 collation = None 6967 expression = None 6968 6969 if self._match_texts(self.TRIM_TYPES): 6970 position = self._prev.text.upper() 6971 6972 this = self._parse_bitwise() 6973 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6974 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6975 expression = self._parse_bitwise() 6976 6977 if invert_order: 6978 this, expression = expression, this 6979 6980 if self._match(TokenType.COLLATE): 6981 collation = self._parse_bitwise() 6982 6983 return self.expression( 6984 exp.Trim, this=this, position=position, expression=expression, collation=collation 6985 ) 6986 6987 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6988 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6989 6990 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6991 return self._parse_window(self._parse_id_var(), alias=True) 6992 6993 def _parse_respect_or_ignore_nulls( 6994 self, this: t.Optional[exp.Expression] 6995 ) -> t.Optional[exp.Expression]: 6996 if self._match_text_seq("IGNORE", "NULLS"): 6997 return self.expression(exp.IgnoreNulls, this=this) 6998 if self._match_text_seq("RESPECT", "NULLS"): 6999 return self.expression(exp.RespectNulls, this=this) 7000 return this 7001 7002 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7003 if self._match(TokenType.HAVING): 7004 self._match_texts(("MAX", "MIN")) 7005 max = self._prev.text.upper() != "MIN" 7006 return self.expression( 7007 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7008 ) 7009 7010 return this 7011 7012 def _parse_window( 7013 self, this: t.Optional[exp.Expression], alias: bool = False 7014 ) -> t.Optional[exp.Expression]: 7015 func = this 7016 comments = func.comments if isinstance(func, exp.Expression) else None 7017 7018 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7019 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7020 if self._match_text_seq("WITHIN", "GROUP"): 7021 order = self._parse_wrapped(self._parse_order) 7022 this = self.expression(exp.WithinGroup, this=this, expression=order) 7023 7024 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7025 self._match(TokenType.WHERE) 7026 this = self.expression( 7027 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7028 ) 7029 self._match_r_paren() 7030 7031 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7032 # Some dialects choose to implement and some do not. 7033 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7034 7035 # There is some code above in _parse_lambda that handles 7036 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7037 7038 # The below changes handle 7039 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7040 7041 # Oracle allows both formats 7042 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7043 # and Snowflake chose to do the same for familiarity 7044 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7045 if isinstance(this, exp.AggFunc): 7046 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7047 7048 if ignore_respect and ignore_respect is not this: 7049 ignore_respect.replace(ignore_respect.this) 7050 this = self.expression(ignore_respect.__class__, this=this) 7051 7052 this = self._parse_respect_or_ignore_nulls(this) 7053 7054 # bigquery select from window x AS (partition by ...) 7055 if alias: 7056 over = None 7057 self._match(TokenType.ALIAS) 7058 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7059 return this 7060 else: 7061 over = self._prev.text.upper() 7062 7063 if comments and isinstance(func, exp.Expression): 7064 func.pop_comments() 7065 7066 if not self._match(TokenType.L_PAREN): 7067 return self.expression( 7068 exp.Window, 7069 comments=comments, 7070 this=this, 7071 alias=self._parse_id_var(False), 7072 over=over, 7073 ) 7074 7075 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7076 7077 first = self._match(TokenType.FIRST) 7078 if self._match_text_seq("LAST"): 7079 first = False 7080 7081 partition, order = self._parse_partition_and_order() 7082 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7083 7084 if kind: 7085 self._match(TokenType.BETWEEN) 7086 start = self._parse_window_spec() 7087 7088 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7089 exclude = ( 7090 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7091 if self._match_text_seq("EXCLUDE") 7092 else None 7093 ) 7094 7095 spec = self.expression( 7096 exp.WindowSpec, 7097 kind=kind, 7098 start=start["value"], 7099 start_side=start["side"], 7100 end=end.get("value"), 7101 end_side=end.get("side"), 7102 exclude=exclude, 7103 ) 7104 else: 7105 spec = None 7106 7107 self._match_r_paren() 7108 7109 window = self.expression( 7110 exp.Window, 7111 comments=comments, 7112 this=this, 7113 partition_by=partition, 7114 order=order, 7115 spec=spec, 7116 alias=window_alias, 7117 over=over, 7118 first=first, 7119 ) 7120 7121 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7122 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7123 return self._parse_window(window, alias=alias) 7124 7125 return window 7126 7127 def _parse_partition_and_order( 7128 self, 7129 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7130 return self._parse_partition_by(), self._parse_order() 7131 7132 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7133 self._match(TokenType.BETWEEN) 7134 7135 return { 7136 "value": ( 7137 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7138 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7139 or self._parse_type() 7140 ), 7141 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7142 } 7143 7144 def _parse_alias( 7145 self, this: t.Optional[exp.Expression], explicit: bool = False 7146 ) -> t.Optional[exp.Expression]: 7147 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7148 # so this section tries to parse the clause version and if it fails, it treats the token 7149 # as an identifier (alias) 7150 if self._can_parse_limit_or_offset(): 7151 return this 7152 7153 any_token = self._match(TokenType.ALIAS) 7154 comments = self._prev_comments or [] 7155 7156 if explicit and not any_token: 7157 return this 7158 7159 if self._match(TokenType.L_PAREN): 7160 aliases = self.expression( 7161 exp.Aliases, 7162 comments=comments, 7163 this=this, 7164 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7165 ) 7166 self._match_r_paren(aliases) 7167 return aliases 7168 7169 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7170 self.STRING_ALIASES and self._parse_string_as_identifier() 7171 ) 7172 7173 if alias: 7174 comments.extend(alias.pop_comments()) 7175 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7176 column = this.this 7177 7178 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7179 if not this.comments and column and column.comments: 7180 this.comments = column.pop_comments() 7181 7182 return this 7183 7184 def _parse_id_var( 7185 self, 7186 any_token: bool = True, 7187 tokens: t.Optional[t.Collection[TokenType]] = None, 7188 ) -> t.Optional[exp.Expression]: 7189 expression = self._parse_identifier() 7190 if not expression and ( 7191 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7192 ): 7193 quoted = self._prev.token_type == TokenType.STRING 7194 expression = self._identifier_expression(quoted=quoted) 7195 7196 return expression 7197 7198 def _parse_string(self) -> t.Optional[exp.Expression]: 7199 if self._match_set(self.STRING_PARSERS): 7200 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7201 return self._parse_placeholder() 7202 7203 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7204 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7205 if output: 7206 output.update_positions(self._prev) 7207 return output 7208 7209 def _parse_number(self) -> t.Optional[exp.Expression]: 7210 if self._match_set(self.NUMERIC_PARSERS): 7211 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7212 return self._parse_placeholder() 7213 7214 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7215 if self._match(TokenType.IDENTIFIER): 7216 return self._identifier_expression(quoted=True) 7217 return self._parse_placeholder() 7218 7219 def _parse_var( 7220 self, 7221 any_token: bool = False, 7222 tokens: t.Optional[t.Collection[TokenType]] = None, 7223 upper: bool = False, 7224 ) -> t.Optional[exp.Expression]: 7225 if ( 7226 (any_token and self._advance_any()) 7227 or self._match(TokenType.VAR) 7228 or (self._match_set(tokens) if tokens else False) 7229 ): 7230 return self.expression( 7231 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7232 ) 7233 return self._parse_placeholder() 7234 7235 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7236 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7237 self._advance() 7238 return self._prev 7239 return None 7240 7241 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7242 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7243 7244 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7245 return self._parse_primary() or self._parse_var(any_token=True) 7246 7247 def _parse_null(self) -> t.Optional[exp.Expression]: 7248 if self._match_set(self.NULL_TOKENS): 7249 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7250 return self._parse_placeholder() 7251 7252 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7253 if self._match(TokenType.TRUE): 7254 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7255 if self._match(TokenType.FALSE): 7256 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7257 return self._parse_placeholder() 7258 7259 def _parse_star(self) -> t.Optional[exp.Expression]: 7260 if self._match(TokenType.STAR): 7261 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7262 return self._parse_placeholder() 7263 7264 def _parse_parameter(self) -> exp.Parameter: 7265 this = self._parse_identifier() or self._parse_primary_or_var() 7266 return self.expression(exp.Parameter, this=this) 7267 7268 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7269 if self._match_set(self.PLACEHOLDER_PARSERS): 7270 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7271 if placeholder: 7272 return placeholder 7273 self._advance(-1) 7274 return None 7275 7276 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7277 if not self._match_texts(keywords): 7278 return None 7279 if self._match(TokenType.L_PAREN, advance=False): 7280 return self._parse_wrapped_csv(self._parse_expression) 7281 7282 expression = self._parse_expression() 7283 return [expression] if expression else None 7284 7285 def _parse_csv( 7286 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7287 ) -> t.List[exp.Expression]: 7288 parse_result = parse_method() 7289 items = [parse_result] if parse_result is not None else [] 7290 7291 while self._match(sep): 7292 self._add_comments(parse_result) 7293 parse_result = parse_method() 7294 if parse_result is not None: 7295 items.append(parse_result) 7296 7297 return items 7298 7299 def _parse_tokens( 7300 self, parse_method: t.Callable, expressions: t.Dict 7301 ) -> t.Optional[exp.Expression]: 7302 this = parse_method() 7303 7304 while self._match_set(expressions): 7305 this = self.expression( 7306 expressions[self._prev.token_type], 7307 this=this, 7308 comments=self._prev_comments, 7309 expression=parse_method(), 7310 ) 7311 7312 return this 7313 7314 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7315 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7316 7317 def _parse_wrapped_csv( 7318 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7319 ) -> t.List[exp.Expression]: 7320 return self._parse_wrapped( 7321 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7322 ) 7323 7324 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7325 wrapped = self._match(TokenType.L_PAREN) 7326 if not wrapped and not optional: 7327 self.raise_error("Expecting (") 7328 parse_result = parse_method() 7329 if wrapped: 7330 self._match_r_paren() 7331 return parse_result 7332 7333 def _parse_expressions(self) -> t.List[exp.Expression]: 7334 return self._parse_csv(self._parse_expression) 7335 7336 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7337 return ( 7338 self._parse_set_operations( 7339 self._parse_alias(self._parse_assignment(), explicit=True) 7340 if alias 7341 else self._parse_assignment() 7342 ) 7343 or self._parse_select() 7344 ) 7345 7346 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7347 return self._parse_query_modifiers( 7348 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7349 ) 7350 7351 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7352 this = None 7353 if self._match_texts(self.TRANSACTION_KIND): 7354 this = self._prev.text 7355 7356 self._match_texts(("TRANSACTION", "WORK")) 7357 7358 modes = [] 7359 while True: 7360 mode = [] 7361 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7362 mode.append(self._prev.text) 7363 7364 if mode: 7365 modes.append(" ".join(mode)) 7366 if not self._match(TokenType.COMMA): 7367 break 7368 7369 return self.expression(exp.Transaction, this=this, modes=modes) 7370 7371 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7372 chain = None 7373 savepoint = None 7374 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7375 7376 self._match_texts(("TRANSACTION", "WORK")) 7377 7378 if self._match_text_seq("TO"): 7379 self._match_text_seq("SAVEPOINT") 7380 savepoint = self._parse_id_var() 7381 7382 if self._match(TokenType.AND): 7383 chain = not self._match_text_seq("NO") 7384 self._match_text_seq("CHAIN") 7385 7386 if is_rollback: 7387 return self.expression(exp.Rollback, savepoint=savepoint) 7388 7389 return self.expression(exp.Commit, chain=chain) 7390 7391 def _parse_refresh(self) -> exp.Refresh: 7392 self._match(TokenType.TABLE) 7393 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7394 7395 def _parse_column_def_with_exists(self): 7396 start = self._index 7397 self._match(TokenType.COLUMN) 7398 7399 exists_column = self._parse_exists(not_=True) 7400 expression = self._parse_field_def() 7401 7402 if not isinstance(expression, exp.ColumnDef): 7403 self._retreat(start) 7404 return None 7405 7406 expression.set("exists", exists_column) 7407 7408 return expression 7409 7410 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7411 if not self._prev.text.upper() == "ADD": 7412 return None 7413 7414 expression = self._parse_column_def_with_exists() 7415 if not expression: 7416 return None 7417 7418 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7419 if self._match_texts(("FIRST", "AFTER")): 7420 position = self._prev.text 7421 column_position = self.expression( 7422 exp.ColumnPosition, this=self._parse_column(), position=position 7423 ) 7424 expression.set("position", column_position) 7425 7426 return expression 7427 7428 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7429 drop = self._match(TokenType.DROP) and self._parse_drop() 7430 if drop and not isinstance(drop, exp.Command): 7431 drop.set("kind", drop.args.get("kind", "COLUMN")) 7432 return drop 7433 7434 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7435 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7436 return self.expression( 7437 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7438 ) 7439 7440 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7441 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7442 self._match_text_seq("ADD") 7443 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7444 return self.expression( 7445 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7446 ) 7447 7448 column_def = self._parse_add_column() 7449 if isinstance(column_def, exp.ColumnDef): 7450 return column_def 7451 7452 exists = self._parse_exists(not_=True) 7453 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7454 return self.expression( 7455 exp.AddPartition, 7456 exists=exists, 7457 this=self._parse_field(any_token=True), 7458 location=self._match_text_seq("LOCATION", advance=False) 7459 and self._parse_property(), 7460 ) 7461 7462 return None 7463 7464 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7465 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7466 or self._match_text_seq("COLUMNS") 7467 ): 7468 schema = self._parse_schema() 7469 7470 return ( 7471 ensure_list(schema) 7472 if schema 7473 else self._parse_csv(self._parse_column_def_with_exists) 7474 ) 7475 7476 return self._parse_csv(_parse_add_alteration) 7477 7478 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7479 if self._match_texts(self.ALTER_ALTER_PARSERS): 7480 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7481 7482 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7483 # keyword after ALTER we default to parsing this statement 7484 self._match(TokenType.COLUMN) 7485 column = self._parse_field(any_token=True) 7486 7487 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7488 return self.expression(exp.AlterColumn, this=column, drop=True) 7489 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7490 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7491 if self._match(TokenType.COMMENT): 7492 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7493 if self._match_text_seq("DROP", "NOT", "NULL"): 7494 return self.expression( 7495 exp.AlterColumn, 7496 this=column, 7497 drop=True, 7498 allow_null=True, 7499 ) 7500 if self._match_text_seq("SET", "NOT", "NULL"): 7501 return self.expression( 7502 exp.AlterColumn, 7503 this=column, 7504 allow_null=False, 7505 ) 7506 7507 if self._match_text_seq("SET", "VISIBLE"): 7508 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7509 if self._match_text_seq("SET", "INVISIBLE"): 7510 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7511 7512 self._match_text_seq("SET", "DATA") 7513 self._match_text_seq("TYPE") 7514 return self.expression( 7515 exp.AlterColumn, 7516 this=column, 7517 dtype=self._parse_types(), 7518 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7519 using=self._match(TokenType.USING) and self._parse_assignment(), 7520 ) 7521 7522 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7523 if self._match_texts(("ALL", "EVEN", "AUTO")): 7524 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7525 7526 self._match_text_seq("KEY", "DISTKEY") 7527 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7528 7529 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7530 if compound: 7531 self._match_text_seq("SORTKEY") 7532 7533 if self._match(TokenType.L_PAREN, advance=False): 7534 return self.expression( 7535 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7536 ) 7537 7538 self._match_texts(("AUTO", "NONE")) 7539 return self.expression( 7540 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7541 ) 7542 7543 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7544 index = self._index - 1 7545 7546 partition_exists = self._parse_exists() 7547 if self._match(TokenType.PARTITION, advance=False): 7548 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7549 7550 self._retreat(index) 7551 return self._parse_csv(self._parse_drop_column) 7552 7553 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7554 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7555 exists = self._parse_exists() 7556 old_column = self._parse_column() 7557 to = self._match_text_seq("TO") 7558 new_column = self._parse_column() 7559 7560 if old_column is None or to is None or new_column is None: 7561 return None 7562 7563 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7564 7565 self._match_text_seq("TO") 7566 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7567 7568 def _parse_alter_table_set(self) -> exp.AlterSet: 7569 alter_set = self.expression(exp.AlterSet) 7570 7571 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7572 "TABLE", "PROPERTIES" 7573 ): 7574 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7575 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7576 alter_set.set("expressions", [self._parse_assignment()]) 7577 elif self._match_texts(("LOGGED", "UNLOGGED")): 7578 alter_set.set("option", exp.var(self._prev.text.upper())) 7579 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7580 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7581 elif self._match_text_seq("LOCATION"): 7582 alter_set.set("location", self._parse_field()) 7583 elif self._match_text_seq("ACCESS", "METHOD"): 7584 alter_set.set("access_method", self._parse_field()) 7585 elif self._match_text_seq("TABLESPACE"): 7586 alter_set.set("tablespace", self._parse_field()) 7587 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7588 alter_set.set("file_format", [self._parse_field()]) 7589 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7590 alter_set.set("file_format", self._parse_wrapped_options()) 7591 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7592 alter_set.set("copy_options", self._parse_wrapped_options()) 7593 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7594 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7595 else: 7596 if self._match_text_seq("SERDE"): 7597 alter_set.set("serde", self._parse_field()) 7598 7599 properties = self._parse_wrapped(self._parse_properties, optional=True) 7600 alter_set.set("expressions", [properties]) 7601 7602 return alter_set 7603 7604 def _parse_alter_session(self) -> exp.AlterSession: 7605 """Parse ALTER SESSION SET/UNSET statements.""" 7606 if self._match(TokenType.SET): 7607 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7608 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7609 7610 self._match_text_seq("UNSET") 7611 expressions = self._parse_csv( 7612 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7613 ) 7614 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7615 7616 def _parse_alter(self) -> exp.Alter | exp.Command: 7617 start = self._prev 7618 7619 alter_token = self._match_set(self.ALTERABLES) and self._prev 7620 if not alter_token: 7621 return self._parse_as_command(start) 7622 7623 exists = self._parse_exists() 7624 only = self._match_text_seq("ONLY") 7625 7626 if alter_token.token_type == TokenType.SESSION: 7627 this = None 7628 check = None 7629 cluster = None 7630 else: 7631 this = self._parse_table(schema=True) 7632 check = self._match_text_seq("WITH", "CHECK") 7633 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7634 7635 if self._next: 7636 self._advance() 7637 7638 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7639 if parser: 7640 actions = ensure_list(parser(self)) 7641 not_valid = self._match_text_seq("NOT", "VALID") 7642 options = self._parse_csv(self._parse_property) 7643 7644 if not self._curr and actions: 7645 return self.expression( 7646 exp.Alter, 7647 this=this, 7648 kind=alter_token.text.upper(), 7649 exists=exists, 7650 actions=actions, 7651 only=only, 7652 options=options, 7653 cluster=cluster, 7654 not_valid=not_valid, 7655 check=check, 7656 ) 7657 7658 return self._parse_as_command(start) 7659 7660 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7661 start = self._prev 7662 # https://duckdb.org/docs/sql/statements/analyze 7663 if not self._curr: 7664 return self.expression(exp.Analyze) 7665 7666 options = [] 7667 while self._match_texts(self.ANALYZE_STYLES): 7668 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7669 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7670 else: 7671 options.append(self._prev.text.upper()) 7672 7673 this: t.Optional[exp.Expression] = None 7674 inner_expression: t.Optional[exp.Expression] = None 7675 7676 kind = self._curr and self._curr.text.upper() 7677 7678 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7679 this = self._parse_table_parts() 7680 elif self._match_text_seq("TABLES"): 7681 if self._match_set((TokenType.FROM, TokenType.IN)): 7682 kind = f"{kind} {self._prev.text.upper()}" 7683 this = self._parse_table(schema=True, is_db_reference=True) 7684 elif self._match_text_seq("DATABASE"): 7685 this = self._parse_table(schema=True, is_db_reference=True) 7686 elif self._match_text_seq("CLUSTER"): 7687 this = self._parse_table() 7688 # Try matching inner expr keywords before fallback to parse table. 7689 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7690 kind = None 7691 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7692 else: 7693 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7694 kind = None 7695 this = self._parse_table_parts() 7696 7697 partition = self._try_parse(self._parse_partition) 7698 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7699 return self._parse_as_command(start) 7700 7701 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7702 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7703 "WITH", "ASYNC", "MODE" 7704 ): 7705 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7706 else: 7707 mode = None 7708 7709 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7710 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7711 7712 properties = self._parse_properties() 7713 return self.expression( 7714 exp.Analyze, 7715 kind=kind, 7716 this=this, 7717 mode=mode, 7718 partition=partition, 7719 properties=properties, 7720 expression=inner_expression, 7721 options=options, 7722 ) 7723 7724 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7725 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7726 this = None 7727 kind = self._prev.text.upper() 7728 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7729 expressions = [] 7730 7731 if not self._match_text_seq("STATISTICS"): 7732 self.raise_error("Expecting token STATISTICS") 7733 7734 if self._match_text_seq("NOSCAN"): 7735 this = "NOSCAN" 7736 elif self._match(TokenType.FOR): 7737 if self._match_text_seq("ALL", "COLUMNS"): 7738 this = "FOR ALL COLUMNS" 7739 if self._match_texts("COLUMNS"): 7740 this = "FOR COLUMNS" 7741 expressions = self._parse_csv(self._parse_column_reference) 7742 elif self._match_text_seq("SAMPLE"): 7743 sample = self._parse_number() 7744 expressions = [ 7745 self.expression( 7746 exp.AnalyzeSample, 7747 sample=sample, 7748 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7749 ) 7750 ] 7751 7752 return self.expression( 7753 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7754 ) 7755 7756 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7757 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7758 kind = None 7759 this = None 7760 expression: t.Optional[exp.Expression] = None 7761 if self._match_text_seq("REF", "UPDATE"): 7762 kind = "REF" 7763 this = "UPDATE" 7764 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7765 this = "UPDATE SET DANGLING TO NULL" 7766 elif self._match_text_seq("STRUCTURE"): 7767 kind = "STRUCTURE" 7768 if self._match_text_seq("CASCADE", "FAST"): 7769 this = "CASCADE FAST" 7770 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7771 ("ONLINE", "OFFLINE") 7772 ): 7773 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7774 expression = self._parse_into() 7775 7776 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7777 7778 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7779 this = self._prev.text.upper() 7780 if self._match_text_seq("COLUMNS"): 7781 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7782 return None 7783 7784 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7785 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7786 if self._match_text_seq("STATISTICS"): 7787 return self.expression(exp.AnalyzeDelete, kind=kind) 7788 return None 7789 7790 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7791 if self._match_text_seq("CHAINED", "ROWS"): 7792 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7793 return None 7794 7795 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7796 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7797 this = self._prev.text.upper() 7798 expression: t.Optional[exp.Expression] = None 7799 expressions = [] 7800 update_options = None 7801 7802 if self._match_text_seq("HISTOGRAM", "ON"): 7803 expressions = self._parse_csv(self._parse_column_reference) 7804 with_expressions = [] 7805 while self._match(TokenType.WITH): 7806 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7807 if self._match_texts(("SYNC", "ASYNC")): 7808 if self._match_text_seq("MODE", advance=False): 7809 with_expressions.append(f"{self._prev.text.upper()} MODE") 7810 self._advance() 7811 else: 7812 buckets = self._parse_number() 7813 if self._match_text_seq("BUCKETS"): 7814 with_expressions.append(f"{buckets} BUCKETS") 7815 if with_expressions: 7816 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7817 7818 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7819 TokenType.UPDATE, advance=False 7820 ): 7821 update_options = self._prev.text.upper() 7822 self._advance() 7823 elif self._match_text_seq("USING", "DATA"): 7824 expression = self.expression(exp.UsingData, this=self._parse_string()) 7825 7826 return self.expression( 7827 exp.AnalyzeHistogram, 7828 this=this, 7829 expressions=expressions, 7830 expression=expression, 7831 update_options=update_options, 7832 ) 7833 7834 def _parse_merge(self) -> exp.Merge: 7835 self._match(TokenType.INTO) 7836 target = self._parse_table() 7837 7838 if target and self._match(TokenType.ALIAS, advance=False): 7839 target.set("alias", self._parse_table_alias()) 7840 7841 self._match(TokenType.USING) 7842 using = self._parse_table() 7843 7844 self._match(TokenType.ON) 7845 on = self._parse_assignment() 7846 7847 return self.expression( 7848 exp.Merge, 7849 this=target, 7850 using=using, 7851 on=on, 7852 whens=self._parse_when_matched(), 7853 returning=self._parse_returning(), 7854 ) 7855 7856 def _parse_when_matched(self) -> exp.Whens: 7857 whens = [] 7858 7859 while self._match(TokenType.WHEN): 7860 matched = not self._match(TokenType.NOT) 7861 self._match_text_seq("MATCHED") 7862 source = ( 7863 False 7864 if self._match_text_seq("BY", "TARGET") 7865 else self._match_text_seq("BY", "SOURCE") 7866 ) 7867 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7868 7869 self._match(TokenType.THEN) 7870 7871 if self._match(TokenType.INSERT): 7872 this = self._parse_star() 7873 if this: 7874 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7875 else: 7876 then = self.expression( 7877 exp.Insert, 7878 this=exp.var("ROW") 7879 if self._match_text_seq("ROW") 7880 else self._parse_value(values=False), 7881 expression=self._match_text_seq("VALUES") and self._parse_value(), 7882 ) 7883 elif self._match(TokenType.UPDATE): 7884 expressions = self._parse_star() 7885 if expressions: 7886 then = self.expression(exp.Update, expressions=expressions) 7887 else: 7888 then = self.expression( 7889 exp.Update, 7890 expressions=self._match(TokenType.SET) 7891 and self._parse_csv(self._parse_equality), 7892 ) 7893 elif self._match(TokenType.DELETE): 7894 then = self.expression(exp.Var, this=self._prev.text) 7895 else: 7896 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7897 7898 whens.append( 7899 self.expression( 7900 exp.When, 7901 matched=matched, 7902 source=source, 7903 condition=condition, 7904 then=then, 7905 ) 7906 ) 7907 return self.expression(exp.Whens, expressions=whens) 7908 7909 def _parse_show(self) -> t.Optional[exp.Expression]: 7910 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7911 if parser: 7912 return parser(self) 7913 return self._parse_as_command(self._prev) 7914 7915 def _parse_set_item_assignment( 7916 self, kind: t.Optional[str] = None 7917 ) -> t.Optional[exp.Expression]: 7918 index = self._index 7919 7920 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7921 return self._parse_set_transaction(global_=kind == "GLOBAL") 7922 7923 left = self._parse_primary() or self._parse_column() 7924 assignment_delimiter = self._match_texts(("=", "TO")) 7925 7926 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7927 self._retreat(index) 7928 return None 7929 7930 right = self._parse_statement() or self._parse_id_var() 7931 if isinstance(right, (exp.Column, exp.Identifier)): 7932 right = exp.var(right.name) 7933 7934 this = self.expression(exp.EQ, this=left, expression=right) 7935 return self.expression(exp.SetItem, this=this, kind=kind) 7936 7937 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7938 self._match_text_seq("TRANSACTION") 7939 characteristics = self._parse_csv( 7940 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7941 ) 7942 return self.expression( 7943 exp.SetItem, 7944 expressions=characteristics, 7945 kind="TRANSACTION", 7946 **{"global": global_}, # type: ignore 7947 ) 7948 7949 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7950 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7951 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7952 7953 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7954 index = self._index 7955 set_ = self.expression( 7956 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7957 ) 7958 7959 if self._curr: 7960 self._retreat(index) 7961 return self._parse_as_command(self._prev) 7962 7963 return set_ 7964 7965 def _parse_var_from_options( 7966 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7967 ) -> t.Optional[exp.Var]: 7968 start = self._curr 7969 if not start: 7970 return None 7971 7972 option = start.text.upper() 7973 continuations = options.get(option) 7974 7975 index = self._index 7976 self._advance() 7977 for keywords in continuations or []: 7978 if isinstance(keywords, str): 7979 keywords = (keywords,) 7980 7981 if self._match_text_seq(*keywords): 7982 option = f"{option} {' '.join(keywords)}" 7983 break 7984 else: 7985 if continuations or continuations is None: 7986 if raise_unmatched: 7987 self.raise_error(f"Unknown option {option}") 7988 7989 self._retreat(index) 7990 return None 7991 7992 return exp.var(option) 7993 7994 def _parse_as_command(self, start: Token) -> exp.Command: 7995 while self._curr: 7996 self._advance() 7997 text = self._find_sql(start, self._prev) 7998 size = len(start.text) 7999 self._warn_unsupported() 8000 return exp.Command(this=text[:size], expression=text[size:]) 8001 8002 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8003 settings = [] 8004 8005 self._match_l_paren() 8006 kind = self._parse_id_var() 8007 8008 if self._match(TokenType.L_PAREN): 8009 while True: 8010 key = self._parse_id_var() 8011 value = self._parse_primary() 8012 if not key and value is None: 8013 break 8014 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8015 self._match(TokenType.R_PAREN) 8016 8017 self._match_r_paren() 8018 8019 return self.expression( 8020 exp.DictProperty, 8021 this=this, 8022 kind=kind.this if kind else None, 8023 settings=settings, 8024 ) 8025 8026 def _parse_dict_range(self, this: str) -> exp.DictRange: 8027 self._match_l_paren() 8028 has_min = self._match_text_seq("MIN") 8029 if has_min: 8030 min = self._parse_var() or self._parse_primary() 8031 self._match_text_seq("MAX") 8032 max = self._parse_var() or self._parse_primary() 8033 else: 8034 max = self._parse_var() or self._parse_primary() 8035 min = exp.Literal.number(0) 8036 self._match_r_paren() 8037 return self.expression(exp.DictRange, this=this, min=min, max=max) 8038 8039 def _parse_comprehension( 8040 self, this: t.Optional[exp.Expression] 8041 ) -> t.Optional[exp.Comprehension]: 8042 index = self._index 8043 expression = self._parse_column() 8044 if not self._match(TokenType.IN): 8045 self._retreat(index - 1) 8046 return None 8047 iterator = self._parse_column() 8048 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8049 return self.expression( 8050 exp.Comprehension, 8051 this=this, 8052 expression=expression, 8053 iterator=iterator, 8054 condition=condition, 8055 ) 8056 8057 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8058 if self._match(TokenType.HEREDOC_STRING): 8059 return self.expression(exp.Heredoc, this=self._prev.text) 8060 8061 if not self._match_text_seq("$"): 8062 return None 8063 8064 tags = ["$"] 8065 tag_text = None 8066 8067 if self._is_connected(): 8068 self._advance() 8069 tags.append(self._prev.text.upper()) 8070 else: 8071 self.raise_error("No closing $ found") 8072 8073 if tags[-1] != "$": 8074 if self._is_connected() and self._match_text_seq("$"): 8075 tag_text = tags[-1] 8076 tags.append("$") 8077 else: 8078 self.raise_error("No closing $ found") 8079 8080 heredoc_start = self._curr 8081 8082 while self._curr: 8083 if self._match_text_seq(*tags, advance=False): 8084 this = self._find_sql(heredoc_start, self._prev) 8085 self._advance(len(tags)) 8086 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8087 8088 self._advance() 8089 8090 self.raise_error(f"No closing {''.join(tags)} found") 8091 return None 8092 8093 def _find_parser( 8094 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8095 ) -> t.Optional[t.Callable]: 8096 if not self._curr: 8097 return None 8098 8099 index = self._index 8100 this = [] 8101 while True: 8102 # The current token might be multiple words 8103 curr = self._curr.text.upper() 8104 key = curr.split(" ") 8105 this.append(curr) 8106 8107 self._advance() 8108 result, trie = in_trie(trie, key) 8109 if result == TrieResult.FAILED: 8110 break 8111 8112 if result == TrieResult.EXISTS: 8113 subparser = parsers[" ".join(this)] 8114 return subparser 8115 8116 self._retreat(index) 8117 return None 8118 8119 def _match(self, token_type, advance=True, expression=None): 8120 if not self._curr: 8121 return None 8122 8123 if self._curr.token_type == token_type: 8124 if advance: 8125 self._advance() 8126 self._add_comments(expression) 8127 return True 8128 8129 return None 8130 8131 def _match_set(self, types, advance=True): 8132 if not self._curr: 8133 return None 8134 8135 if self._curr.token_type in types: 8136 if advance: 8137 self._advance() 8138 return True 8139 8140 return None 8141 8142 def _match_pair(self, token_type_a, token_type_b, advance=True): 8143 if not self._curr or not self._next: 8144 return None 8145 8146 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8147 if advance: 8148 self._advance(2) 8149 return True 8150 8151 return None 8152 8153 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8154 if not self._match(TokenType.L_PAREN, expression=expression): 8155 self.raise_error("Expecting (") 8156 8157 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8158 if not self._match(TokenType.R_PAREN, expression=expression): 8159 self.raise_error("Expecting )") 8160 8161 def _match_texts(self, texts, advance=True): 8162 if ( 8163 self._curr 8164 and self._curr.token_type != TokenType.STRING 8165 and self._curr.text.upper() in texts 8166 ): 8167 if advance: 8168 self._advance() 8169 return True 8170 return None 8171 8172 def _match_text_seq(self, *texts, advance=True): 8173 index = self._index 8174 for text in texts: 8175 if ( 8176 self._curr 8177 and self._curr.token_type != TokenType.STRING 8178 and self._curr.text.upper() == text 8179 ): 8180 self._advance() 8181 else: 8182 self._retreat(index) 8183 return None 8184 8185 if not advance: 8186 self._retreat(index) 8187 8188 return True 8189 8190 def _replace_lambda( 8191 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8192 ) -> t.Optional[exp.Expression]: 8193 if not node: 8194 return node 8195 8196 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8197 8198 for column in node.find_all(exp.Column): 8199 typ = lambda_types.get(column.parts[0].name) 8200 if typ is not None: 8201 dot_or_id = column.to_dot() if column.table else column.this 8202 8203 if typ: 8204 dot_or_id = self.expression( 8205 exp.Cast, 8206 this=dot_or_id, 8207 to=typ, 8208 ) 8209 8210 parent = column.parent 8211 8212 while isinstance(parent, exp.Dot): 8213 if not isinstance(parent.parent, exp.Dot): 8214 parent.replace(dot_or_id) 8215 break 8216 parent = parent.parent 8217 else: 8218 if column is node: 8219 node = dot_or_id 8220 else: 8221 column.replace(dot_or_id) 8222 return node 8223 8224 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8225 start = self._prev 8226 8227 # Not to be confused with TRUNCATE(number, decimals) function call 8228 if self._match(TokenType.L_PAREN): 8229 self._retreat(self._index - 2) 8230 return self._parse_function() 8231 8232 # Clickhouse supports TRUNCATE DATABASE as well 8233 is_database = self._match(TokenType.DATABASE) 8234 8235 self._match(TokenType.TABLE) 8236 8237 exists = self._parse_exists(not_=False) 8238 8239 expressions = self._parse_csv( 8240 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8241 ) 8242 8243 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8244 8245 if self._match_text_seq("RESTART", "IDENTITY"): 8246 identity = "RESTART" 8247 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8248 identity = "CONTINUE" 8249 else: 8250 identity = None 8251 8252 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8253 option = self._prev.text 8254 else: 8255 option = None 8256 8257 partition = self._parse_partition() 8258 8259 # Fallback case 8260 if self._curr: 8261 return self._parse_as_command(start) 8262 8263 return self.expression( 8264 exp.TruncateTable, 8265 expressions=expressions, 8266 is_database=is_database, 8267 exists=exists, 8268 cluster=cluster, 8269 identity=identity, 8270 option=option, 8271 partition=partition, 8272 ) 8273 8274 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8275 this = self._parse_ordered(self._parse_opclass) 8276 8277 if not self._match(TokenType.WITH): 8278 return this 8279 8280 op = self._parse_var(any_token=True) 8281 8282 return self.expression(exp.WithOperator, this=this, op=op) 8283 8284 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8285 self._match(TokenType.EQ) 8286 self._match(TokenType.L_PAREN) 8287 8288 opts: t.List[t.Optional[exp.Expression]] = [] 8289 option: exp.Expression | None 8290 while self._curr and not self._match(TokenType.R_PAREN): 8291 if self._match_text_seq("FORMAT_NAME", "="): 8292 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8293 option = self._parse_format_name() 8294 else: 8295 option = self._parse_property() 8296 8297 if option is None: 8298 self.raise_error("Unable to parse option") 8299 break 8300 8301 opts.append(option) 8302 8303 return opts 8304 8305 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8306 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8307 8308 options = [] 8309 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8310 option = self._parse_var(any_token=True) 8311 prev = self._prev.text.upper() 8312 8313 # Different dialects might separate options and values by white space, "=" and "AS" 8314 self._match(TokenType.EQ) 8315 self._match(TokenType.ALIAS) 8316 8317 param = self.expression(exp.CopyParameter, this=option) 8318 8319 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8320 TokenType.L_PAREN, advance=False 8321 ): 8322 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8323 param.set("expressions", self._parse_wrapped_options()) 8324 elif prev == "FILE_FORMAT": 8325 # T-SQL's external file format case 8326 param.set("expression", self._parse_field()) 8327 else: 8328 param.set("expression", self._parse_unquoted_field()) 8329 8330 options.append(param) 8331 self._match(sep) 8332 8333 return options 8334 8335 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8336 expr = self.expression(exp.Credentials) 8337 8338 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8339 expr.set("storage", self._parse_field()) 8340 if self._match_text_seq("CREDENTIALS"): 8341 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8342 creds = ( 8343 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8344 ) 8345 expr.set("credentials", creds) 8346 if self._match_text_seq("ENCRYPTION"): 8347 expr.set("encryption", self._parse_wrapped_options()) 8348 if self._match_text_seq("IAM_ROLE"): 8349 expr.set("iam_role", self._parse_field()) 8350 if self._match_text_seq("REGION"): 8351 expr.set("region", self._parse_field()) 8352 8353 return expr 8354 8355 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8356 return self._parse_field() 8357 8358 def _parse_copy(self) -> exp.Copy | exp.Command: 8359 start = self._prev 8360 8361 self._match(TokenType.INTO) 8362 8363 this = ( 8364 self._parse_select(nested=True, parse_subquery_alias=False) 8365 if self._match(TokenType.L_PAREN, advance=False) 8366 else self._parse_table(schema=True) 8367 ) 8368 8369 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8370 8371 files = self._parse_csv(self._parse_file_location) 8372 credentials = self._parse_credentials() 8373 8374 self._match_text_seq("WITH") 8375 8376 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8377 8378 # Fallback case 8379 if self._curr: 8380 return self._parse_as_command(start) 8381 8382 return self.expression( 8383 exp.Copy, 8384 this=this, 8385 kind=kind, 8386 credentials=credentials, 8387 files=files, 8388 params=params, 8389 ) 8390 8391 def _parse_normalize(self) -> exp.Normalize: 8392 return self.expression( 8393 exp.Normalize, 8394 this=self._parse_bitwise(), 8395 form=self._match(TokenType.COMMA) and self._parse_var(), 8396 ) 8397 8398 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8399 args = self._parse_csv(lambda: self._parse_lambda()) 8400 8401 this = seq_get(args, 0) 8402 decimals = seq_get(args, 1) 8403 8404 return expr_type( 8405 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8406 ) 8407 8408 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8409 star_token = self._prev 8410 8411 if self._match_text_seq("COLUMNS", "(", advance=False): 8412 this = self._parse_function() 8413 if isinstance(this, exp.Columns): 8414 this.set("unpack", True) 8415 return this 8416 8417 return self.expression( 8418 exp.Star, 8419 **{ # type: ignore 8420 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8421 "replace": self._parse_star_op("REPLACE"), 8422 "rename": self._parse_star_op("RENAME"), 8423 }, 8424 ).update_positions(star_token) 8425 8426 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8427 privilege_parts = [] 8428 8429 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8430 # (end of privilege list) or L_PAREN (start of column list) are met 8431 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8432 privilege_parts.append(self._curr.text.upper()) 8433 self._advance() 8434 8435 this = exp.var(" ".join(privilege_parts)) 8436 expressions = ( 8437 self._parse_wrapped_csv(self._parse_column) 8438 if self._match(TokenType.L_PAREN, advance=False) 8439 else None 8440 ) 8441 8442 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8443 8444 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8445 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8446 principal = self._parse_id_var() 8447 8448 if not principal: 8449 return None 8450 8451 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8452 8453 def _parse_grant_revoke_common( 8454 self, 8455 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8456 privileges = self._parse_csv(self._parse_grant_privilege) 8457 8458 self._match(TokenType.ON) 8459 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8460 8461 # Attempt to parse the securable e.g. MySQL allows names 8462 # such as "foo.*", "*.*" which are not easily parseable yet 8463 securable = self._try_parse(self._parse_table_parts) 8464 8465 return privileges, kind, securable 8466 8467 def _parse_grant(self) -> exp.Grant | exp.Command: 8468 start = self._prev 8469 8470 privileges, kind, securable = self._parse_grant_revoke_common() 8471 8472 if not securable or not self._match_text_seq("TO"): 8473 return self._parse_as_command(start) 8474 8475 principals = self._parse_csv(self._parse_grant_principal) 8476 8477 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8478 8479 if self._curr: 8480 return self._parse_as_command(start) 8481 8482 return self.expression( 8483 exp.Grant, 8484 privileges=privileges, 8485 kind=kind, 8486 securable=securable, 8487 principals=principals, 8488 grant_option=grant_option, 8489 ) 8490 8491 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8492 start = self._prev 8493 8494 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8495 8496 privileges, kind, securable = self._parse_grant_revoke_common() 8497 8498 if not securable or not self._match_text_seq("FROM"): 8499 return self._parse_as_command(start) 8500 8501 principals = self._parse_csv(self._parse_grant_principal) 8502 8503 cascade = None 8504 if self._match_texts(("CASCADE", "RESTRICT")): 8505 cascade = self._prev.text.upper() 8506 8507 if self._curr: 8508 return self._parse_as_command(start) 8509 8510 return self.expression( 8511 exp.Revoke, 8512 privileges=privileges, 8513 kind=kind, 8514 securable=securable, 8515 principals=principals, 8516 grant_option=grant_option, 8517 cascade=cascade, 8518 ) 8519 8520 def _parse_overlay(self) -> exp.Overlay: 8521 return self.expression( 8522 exp.Overlay, 8523 **{ # type: ignore 8524 "this": self._parse_bitwise(), 8525 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8526 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8527 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8528 }, 8529 ) 8530 8531 def _parse_format_name(self) -> exp.Property: 8532 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8533 # for FILE_FORMAT = <format_name> 8534 return self.expression( 8535 exp.Property, 8536 this=exp.var("FORMAT_NAME"), 8537 value=self._parse_string() or self._parse_table_parts(), 8538 ) 8539 8540 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8541 args: t.List[exp.Expression] = [] 8542 8543 if self._match(TokenType.DISTINCT): 8544 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8545 self._match(TokenType.COMMA) 8546 8547 args.extend(self._parse_csv(self._parse_assignment)) 8548 8549 return self.expression( 8550 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8551 ) 8552 8553 def _identifier_expression( 8554 self, token: t.Optional[Token] = None, **kwargs: t.Any 8555 ) -> exp.Identifier: 8556 token = token or self._prev 8557 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8558 expression.update_positions(token) 8559 return expression 8560 8561 def _build_pipe_cte( 8562 self, 8563 query: exp.Query, 8564 expressions: t.List[exp.Expression], 8565 alias_cte: t.Optional[exp.TableAlias] = None, 8566 ) -> exp.Select: 8567 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8568 if alias_cte: 8569 new_cte = alias_cte 8570 else: 8571 self._pipe_cte_counter += 1 8572 new_cte = f"__tmp{self._pipe_cte_counter}" 8573 8574 with_ = query.args.get("with") 8575 ctes = with_.pop() if with_ else None 8576 8577 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8578 if ctes: 8579 new_select.set("with", ctes) 8580 8581 return new_select.with_(new_cte, as_=query, copy=False) 8582 8583 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8584 select = self._parse_select(consume_pipe=False) 8585 if not select: 8586 return query 8587 8588 return self._build_pipe_cte( 8589 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8590 ) 8591 8592 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8593 limit = self._parse_limit() 8594 offset = self._parse_offset() 8595 if limit: 8596 curr_limit = query.args.get("limit", limit) 8597 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8598 query.limit(limit, copy=False) 8599 if offset: 8600 curr_offset = query.args.get("offset") 8601 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8602 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8603 8604 return query 8605 8606 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8607 this = self._parse_assignment() 8608 if self._match_text_seq("GROUP", "AND", advance=False): 8609 return this 8610 8611 this = self._parse_alias(this) 8612 8613 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8614 return self._parse_ordered(lambda: this) 8615 8616 return this 8617 8618 def _parse_pipe_syntax_aggregate_group_order_by( 8619 self, query: exp.Select, group_by_exists: bool = True 8620 ) -> exp.Select: 8621 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8622 aggregates_or_groups, orders = [], [] 8623 for element in expr: 8624 if isinstance(element, exp.Ordered): 8625 this = element.this 8626 if isinstance(this, exp.Alias): 8627 element.set("this", this.args["alias"]) 8628 orders.append(element) 8629 else: 8630 this = element 8631 aggregates_or_groups.append(this) 8632 8633 if group_by_exists: 8634 query.select(*aggregates_or_groups, copy=False).group_by( 8635 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8636 copy=False, 8637 ) 8638 else: 8639 query.select(*aggregates_or_groups, append=False, copy=False) 8640 8641 if orders: 8642 return query.order_by(*orders, append=False, copy=False) 8643 8644 return query 8645 8646 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8647 self._match_text_seq("AGGREGATE") 8648 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8649 8650 if self._match(TokenType.GROUP_BY) or ( 8651 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8652 ): 8653 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8654 8655 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8656 8657 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8658 first_setop = self.parse_set_operation(this=query) 8659 if not first_setop: 8660 return None 8661 8662 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8663 expr = self._parse_paren() 8664 return expr.assert_is(exp.Subquery).unnest() if expr else None 8665 8666 first_setop.this.pop() 8667 8668 setops = [ 8669 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8670 *self._parse_csv(_parse_and_unwrap_query), 8671 ] 8672 8673 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8674 with_ = query.args.get("with") 8675 ctes = with_.pop() if with_ else None 8676 8677 if isinstance(first_setop, exp.Union): 8678 query = query.union(*setops, copy=False, **first_setop.args) 8679 elif isinstance(first_setop, exp.Except): 8680 query = query.except_(*setops, copy=False, **first_setop.args) 8681 else: 8682 query = query.intersect(*setops, copy=False, **first_setop.args) 8683 8684 query.set("with", ctes) 8685 8686 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8687 8688 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8689 join = self._parse_join() 8690 if not join: 8691 return None 8692 8693 if isinstance(query, exp.Select): 8694 return query.join(join, copy=False) 8695 8696 return query 8697 8698 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8699 pivots = self._parse_pivots() 8700 if not pivots: 8701 return query 8702 8703 from_ = query.args.get("from") 8704 if from_: 8705 from_.this.set("pivots", pivots) 8706 8707 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8708 8709 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8710 self._match_text_seq("EXTEND") 8711 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8712 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8713 8714 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8715 sample = self._parse_table_sample() 8716 8717 with_ = query.args.get("with") 8718 if with_: 8719 with_.expressions[-1].this.set("sample", sample) 8720 else: 8721 query.set("sample", sample) 8722 8723 return query 8724 8725 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8726 if isinstance(query, exp.Subquery): 8727 query = exp.select("*").from_(query, copy=False) 8728 8729 if not query.args.get("from"): 8730 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8731 8732 while self._match(TokenType.PIPE_GT): 8733 start = self._curr 8734 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8735 if not parser: 8736 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8737 # keywords, making it tricky to disambiguate them without lookahead. The approach 8738 # here is to try and parse a set operation and if that fails, then try to parse a 8739 # join operator. If that fails as well, then the operator is not supported. 8740 parsed_query = self._parse_pipe_syntax_set_operator(query) 8741 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8742 if not parsed_query: 8743 self._retreat(start) 8744 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8745 break 8746 query = parsed_query 8747 else: 8748 query = parser(self, query) 8749 8750 return query 8751 8752 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8753 vars = self._parse_csv(self._parse_id_var) 8754 if not vars: 8755 return None 8756 8757 return self.expression( 8758 exp.DeclareItem, 8759 this=vars, 8760 kind=self._parse_types(), 8761 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8762 ) 8763 8764 def _parse_declare(self) -> exp.Declare | exp.Command: 8765 start = self._prev 8766 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8767 8768 if not expressions or self._curr: 8769 return self._parse_as_command(start) 8770 8771 return self.expression(exp.Declare, expressions=expressions) 8772 8773 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8774 exp_class = exp.Cast if strict else exp.TryCast 8775 8776 if exp_class == exp.TryCast: 8777 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8778 8779 return self.expression(exp_class, **kwargs) 8780 8781 def _parse_json_value(self) -> exp.JSONValue: 8782 this = self._parse_bitwise() 8783 self._match(TokenType.COMMA) 8784 path = self._parse_bitwise() 8785 8786 returning = self._match(TokenType.RETURNING) and self._parse_type() 8787 8788 return self.expression( 8789 exp.JSONValue, 8790 this=this, 8791 path=self.dialect.to_json_path(path), 8792 returning=returning, 8793 on_condition=self._parse_on_condition(), 8794 ) 8795 8796 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8797 def concat_exprs( 8798 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8799 ) -> exp.Expression: 8800 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8801 concat_exprs = [ 8802 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8803 ] 8804 node.set("expressions", concat_exprs) 8805 return node 8806 if len(exprs) == 1: 8807 return exprs[0] 8808 return self.expression(exp.Concat, expressions=args, safe=True) 8809 8810 args = self._parse_csv(self._parse_lambda) 8811 8812 if args: 8813 order = args[-1] if isinstance(args[-1], exp.Order) else None 8814 8815 if order: 8816 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8817 # remove 'expr' from exp.Order and add it back to args 8818 args[-1] = order.this 8819 order.set("this", concat_exprs(order.this, args)) 8820 8821 this = order or concat_exprs(args[0], args) 8822 else: 8823 this = None 8824 8825 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8826 8827 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *CREATABLES, 574 *SUBQUERY_PREDICATES, 575 *TYPE_TOKENS, 576 *NO_PAREN_FUNCTIONS, 577 } 578 ID_VAR_TOKENS.remove(TokenType.UNION) 579 580 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 581 TokenType.ANTI, 582 TokenType.ASOF, 583 TokenType.FULL, 584 TokenType.LEFT, 585 TokenType.LOCK, 586 TokenType.NATURAL, 587 TokenType.RIGHT, 588 TokenType.SEMI, 589 TokenType.WINDOW, 590 } 591 592 ALIAS_TOKENS = ID_VAR_TOKENS 593 594 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 595 596 ARRAY_CONSTRUCTORS = { 597 "ARRAY": exp.Array, 598 "LIST": exp.List, 599 } 600 601 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 602 603 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 604 605 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 606 607 FUNC_TOKENS = { 608 TokenType.COLLATE, 609 TokenType.COMMAND, 610 TokenType.CURRENT_DATE, 611 TokenType.CURRENT_DATETIME, 612 TokenType.CURRENT_SCHEMA, 613 TokenType.CURRENT_TIMESTAMP, 614 TokenType.CURRENT_TIME, 615 TokenType.CURRENT_USER, 616 TokenType.FILTER, 617 TokenType.FIRST, 618 TokenType.FORMAT, 619 TokenType.GET, 620 TokenType.GLOB, 621 TokenType.IDENTIFIER, 622 TokenType.INDEX, 623 TokenType.ISNULL, 624 TokenType.ILIKE, 625 TokenType.INSERT, 626 TokenType.LIKE, 627 TokenType.MERGE, 628 TokenType.NEXT, 629 TokenType.OFFSET, 630 TokenType.PRIMARY_KEY, 631 TokenType.RANGE, 632 TokenType.REPLACE, 633 TokenType.RLIKE, 634 TokenType.ROW, 635 TokenType.UNNEST, 636 TokenType.VAR, 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.SEQUENCE, 640 TokenType.DATE, 641 TokenType.DATETIME, 642 TokenType.TABLE, 643 TokenType.TIMESTAMP, 644 TokenType.TIMESTAMPTZ, 645 TokenType.TRUNCATE, 646 TokenType.UTC_DATE, 647 TokenType.UTC_TIME, 648 TokenType.UTC_TIMESTAMP, 649 TokenType.WINDOW, 650 TokenType.XOR, 651 *TYPE_TOKENS, 652 *SUBQUERY_PREDICATES, 653 } 654 655 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.AND: exp.And, 657 } 658 659 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.COLON_EQ: exp.PropertyEQ, 661 } 662 663 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 664 TokenType.OR: exp.Or, 665 } 666 667 EQUALITY = { 668 TokenType.EQ: exp.EQ, 669 TokenType.NEQ: exp.NEQ, 670 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 671 } 672 673 COMPARISON = { 674 TokenType.GT: exp.GT, 675 TokenType.GTE: exp.GTE, 676 TokenType.LT: exp.LT, 677 TokenType.LTE: exp.LTE, 678 } 679 680 BITWISE = { 681 TokenType.AMP: exp.BitwiseAnd, 682 TokenType.CARET: exp.BitwiseXor, 683 TokenType.PIPE: exp.BitwiseOr, 684 } 685 686 TERM = { 687 TokenType.DASH: exp.Sub, 688 TokenType.PLUS: exp.Add, 689 TokenType.MOD: exp.Mod, 690 TokenType.COLLATE: exp.Collate, 691 } 692 693 FACTOR = { 694 TokenType.DIV: exp.IntDiv, 695 TokenType.LR_ARROW: exp.Distance, 696 TokenType.SLASH: exp.Div, 697 TokenType.STAR: exp.Mul, 698 } 699 700 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 701 702 TIMES = { 703 TokenType.TIME, 704 TokenType.TIMETZ, 705 } 706 707 TIMESTAMPS = { 708 TokenType.TIMESTAMP, 709 TokenType.TIMESTAMPNTZ, 710 TokenType.TIMESTAMPTZ, 711 TokenType.TIMESTAMPLTZ, 712 *TIMES, 713 } 714 715 SET_OPERATIONS = { 716 TokenType.UNION, 717 TokenType.INTERSECT, 718 TokenType.EXCEPT, 719 } 720 721 JOIN_METHODS = { 722 TokenType.ASOF, 723 TokenType.NATURAL, 724 TokenType.POSITIONAL, 725 } 726 727 JOIN_SIDES = { 728 TokenType.LEFT, 729 TokenType.RIGHT, 730 TokenType.FULL, 731 } 732 733 JOIN_KINDS = { 734 TokenType.ANTI, 735 TokenType.CROSS, 736 TokenType.INNER, 737 TokenType.OUTER, 738 TokenType.SEMI, 739 TokenType.STRAIGHT_JOIN, 740 } 741 742 JOIN_HINTS: t.Set[str] = set() 743 744 LAMBDAS = { 745 TokenType.ARROW: lambda self, expressions: self.expression( 746 exp.Lambda, 747 this=self._replace_lambda( 748 self._parse_assignment(), 749 expressions, 750 ), 751 expressions=expressions, 752 ), 753 TokenType.FARROW: lambda self, expressions: self.expression( 754 exp.Kwarg, 755 this=exp.var(expressions[0].name), 756 expression=self._parse_assignment(), 757 ), 758 } 759 760 COLUMN_OPERATORS = { 761 TokenType.DOT: None, 762 TokenType.DOTCOLON: lambda self, this, to: self.expression( 763 exp.JSONCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.DCOLON: lambda self, this, to: self.build_cast( 768 strict=self.STRICT_CAST, this=this, to=to 769 ), 770 TokenType.ARROW: lambda self, this, path: self.expression( 771 exp.JSONExtract, 772 this=this, 773 expression=self.dialect.to_json_path(path), 774 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 775 ), 776 TokenType.DARROW: lambda self, this, path: self.expression( 777 exp.JSONExtractScalar, 778 this=this, 779 expression=self.dialect.to_json_path(path), 780 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 781 ), 782 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 783 exp.JSONBExtract, 784 this=this, 785 expression=path, 786 ), 787 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 788 exp.JSONBExtractScalar, 789 this=this, 790 expression=path, 791 ), 792 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 793 exp.JSONBContains, 794 this=this, 795 expression=key, 796 ), 797 } 798 799 CAST_COLUMN_OPERATORS = { 800 TokenType.DOTCOLON, 801 TokenType.DCOLON, 802 } 803 804 EXPRESSION_PARSERS = { 805 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 806 exp.Column: lambda self: self._parse_column(), 807 exp.Condition: lambda self: self._parse_assignment(), 808 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 809 exp.Expression: lambda self: self._parse_expression(), 810 exp.From: lambda self: self._parse_from(joins=True), 811 exp.Group: lambda self: self._parse_group(), 812 exp.Having: lambda self: self._parse_having(), 813 exp.Hint: lambda self: self._parse_hint_body(), 814 exp.Identifier: lambda self: self._parse_id_var(), 815 exp.Join: lambda self: self._parse_join(), 816 exp.Lambda: lambda self: self._parse_lambda(), 817 exp.Lateral: lambda self: self._parse_lateral(), 818 exp.Limit: lambda self: self._parse_limit(), 819 exp.Offset: lambda self: self._parse_offset(), 820 exp.Order: lambda self: self._parse_order(), 821 exp.Ordered: lambda self: self._parse_ordered(), 822 exp.Properties: lambda self: self._parse_properties(), 823 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 824 exp.Qualify: lambda self: self._parse_qualify(), 825 exp.Returning: lambda self: self._parse_returning(), 826 exp.Select: lambda self: self._parse_select(), 827 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 828 exp.Table: lambda self: self._parse_table_parts(), 829 exp.TableAlias: lambda self: self._parse_table_alias(), 830 exp.Tuple: lambda self: self._parse_value(values=False), 831 exp.Whens: lambda self: self._parse_when_matched(), 832 exp.Where: lambda self: self._parse_where(), 833 exp.Window: lambda self: self._parse_named_window(), 834 exp.With: lambda self: self._parse_with(), 835 "JOIN_TYPE": lambda self: self._parse_join_parts(), 836 } 837 838 STATEMENT_PARSERS = { 839 TokenType.ALTER: lambda self: self._parse_alter(), 840 TokenType.ANALYZE: lambda self: self._parse_analyze(), 841 TokenType.BEGIN: lambda self: self._parse_transaction(), 842 TokenType.CACHE: lambda self: self._parse_cache(), 843 TokenType.COMMENT: lambda self: self._parse_comment(), 844 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 845 TokenType.COPY: lambda self: self._parse_copy(), 846 TokenType.CREATE: lambda self: self._parse_create(), 847 TokenType.DELETE: lambda self: self._parse_delete(), 848 TokenType.DESC: lambda self: self._parse_describe(), 849 TokenType.DESCRIBE: lambda self: self._parse_describe(), 850 TokenType.DROP: lambda self: self._parse_drop(), 851 TokenType.GRANT: lambda self: self._parse_grant(), 852 TokenType.REVOKE: lambda self: self._parse_revoke(), 853 TokenType.INSERT: lambda self: self._parse_insert(), 854 TokenType.KILL: lambda self: self._parse_kill(), 855 TokenType.LOAD: lambda self: self._parse_load(), 856 TokenType.MERGE: lambda self: self._parse_merge(), 857 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 858 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 859 TokenType.REFRESH: lambda self: self._parse_refresh(), 860 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 861 TokenType.SET: lambda self: self._parse_set(), 862 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 863 TokenType.UNCACHE: lambda self: self._parse_uncache(), 864 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 865 TokenType.UPDATE: lambda self: self._parse_update(), 866 TokenType.USE: lambda self: self._parse_use(), 867 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 868 } 869 870 UNARY_PARSERS = { 871 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 872 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 873 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 874 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 875 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 876 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 877 } 878 879 STRING_PARSERS = { 880 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 881 exp.RawString, this=token.text 882 ), 883 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 884 exp.National, this=token.text 885 ), 886 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 887 TokenType.STRING: lambda self, token: self.expression( 888 exp.Literal, this=token.text, is_string=True 889 ), 890 TokenType.UNICODE_STRING: lambda self, token: self.expression( 891 exp.UnicodeString, 892 this=token.text, 893 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 894 ), 895 } 896 897 NUMERIC_PARSERS = { 898 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 899 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 900 TokenType.HEX_STRING: lambda self, token: self.expression( 901 exp.HexString, 902 this=token.text, 903 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 904 ), 905 TokenType.NUMBER: lambda self, token: self.expression( 906 exp.Literal, this=token.text, is_string=False 907 ), 908 } 909 910 PRIMARY_PARSERS = { 911 **STRING_PARSERS, 912 **NUMERIC_PARSERS, 913 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 914 TokenType.NULL: lambda self, _: self.expression(exp.Null), 915 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 916 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 917 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 918 TokenType.STAR: lambda self, _: self._parse_star_ops(), 919 } 920 921 PLACEHOLDER_PARSERS = { 922 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 923 TokenType.PARAMETER: lambda self: self._parse_parameter(), 924 TokenType.COLON: lambda self: ( 925 self.expression(exp.Placeholder, this=self._prev.text) 926 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 927 else None 928 ), 929 } 930 931 RANGE_PARSERS = { 932 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 933 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 934 TokenType.GLOB: binary_range_parser(exp.Glob), 935 TokenType.ILIKE: binary_range_parser(exp.ILike), 936 TokenType.IN: lambda self, this: self._parse_in(this), 937 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 938 TokenType.IS: lambda self, this: self._parse_is(this), 939 TokenType.LIKE: binary_range_parser(exp.Like), 940 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 941 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 942 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 943 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 944 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 945 } 946 947 PIPE_SYNTAX_TRANSFORM_PARSERS = { 948 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 949 "AS": lambda self, query: self._build_pipe_cte( 950 query, [exp.Star()], self._parse_table_alias() 951 ), 952 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 953 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 954 "ORDER BY": lambda self, query: query.order_by( 955 self._parse_order(), append=False, copy=False 956 ), 957 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 958 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 959 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 960 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 961 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 962 } 963 964 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 965 "ALLOWED_VALUES": lambda self: self.expression( 966 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 967 ), 968 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 969 "AUTO": lambda self: self._parse_auto_property(), 970 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 971 "BACKUP": lambda self: self.expression( 972 exp.BackupProperty, this=self._parse_var(any_token=True) 973 ), 974 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 975 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 976 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 977 "CHECKSUM": lambda self: self._parse_checksum(), 978 "CLUSTER BY": lambda self: self._parse_cluster(), 979 "CLUSTERED": lambda self: self._parse_clustered_by(), 980 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 981 exp.CollateProperty, **kwargs 982 ), 983 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 984 "CONTAINS": lambda self: self._parse_contains_property(), 985 "COPY": lambda self: self._parse_copy_property(), 986 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 987 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 988 "DEFINER": lambda self: self._parse_definer(), 989 "DETERMINISTIC": lambda self: self.expression( 990 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 991 ), 992 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 993 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 994 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 995 "DISTKEY": lambda self: self._parse_distkey(), 996 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 997 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 998 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 999 "ENVIRONMENT": lambda self: self.expression( 1000 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1001 ), 1002 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1003 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1004 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1005 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1006 "FREESPACE": lambda self: self._parse_freespace(), 1007 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1008 "HEAP": lambda self: self.expression(exp.HeapProperty), 1009 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1010 "IMMUTABLE": lambda self: self.expression( 1011 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1012 ), 1013 "INHERITS": lambda self: self.expression( 1014 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1015 ), 1016 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1017 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1018 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1019 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1020 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1021 "LIKE": lambda self: self._parse_create_like(), 1022 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1023 "LOCK": lambda self: self._parse_locking(), 1024 "LOCKING": lambda self: self._parse_locking(), 1025 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1026 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1027 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1028 "MODIFIES": lambda self: self._parse_modifies_property(), 1029 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1030 "NO": lambda self: self._parse_no_property(), 1031 "ON": lambda self: self._parse_on_property(), 1032 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1033 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1034 "PARTITION": lambda self: self._parse_partitioned_of(), 1035 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1036 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1037 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1038 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1039 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1040 "READS": lambda self: self._parse_reads_property(), 1041 "REMOTE": lambda self: self._parse_remote_with_connection(), 1042 "RETURNS": lambda self: self._parse_returns(), 1043 "STRICT": lambda self: self.expression(exp.StrictProperty), 1044 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1045 "ROW": lambda self: self._parse_row(), 1046 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1047 "SAMPLE": lambda self: self.expression( 1048 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1049 ), 1050 "SECURE": lambda self: self.expression(exp.SecureProperty), 1051 "SECURITY": lambda self: self._parse_security(), 1052 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1053 "SETTINGS": lambda self: self._parse_settings_property(), 1054 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1055 "SORTKEY": lambda self: self._parse_sortkey(), 1056 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1057 "STABLE": lambda self: self.expression( 1058 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1059 ), 1060 "STORED": lambda self: self._parse_stored(), 1061 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1062 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1063 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1064 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1065 "TO": lambda self: self._parse_to_table(), 1066 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1067 "TRANSFORM": lambda self: self.expression( 1068 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1069 ), 1070 "TTL": lambda self: self._parse_ttl(), 1071 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1072 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1073 "VOLATILE": lambda self: self._parse_volatile_property(), 1074 "WITH": lambda self: self._parse_with_property(), 1075 } 1076 1077 CONSTRAINT_PARSERS = { 1078 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1079 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1080 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1081 "CHARACTER SET": lambda self: self.expression( 1082 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1083 ), 1084 "CHECK": lambda self: self.expression( 1085 exp.CheckColumnConstraint, 1086 this=self._parse_wrapped(self._parse_assignment), 1087 enforced=self._match_text_seq("ENFORCED"), 1088 ), 1089 "COLLATE": lambda self: self.expression( 1090 exp.CollateColumnConstraint, 1091 this=self._parse_identifier() or self._parse_column(), 1092 ), 1093 "COMMENT": lambda self: self.expression( 1094 exp.CommentColumnConstraint, this=self._parse_string() 1095 ), 1096 "COMPRESS": lambda self: self._parse_compress(), 1097 "CLUSTERED": lambda self: self.expression( 1098 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1099 ), 1100 "NONCLUSTERED": lambda self: self.expression( 1101 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1102 ), 1103 "DEFAULT": lambda self: self.expression( 1104 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1105 ), 1106 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1107 "EPHEMERAL": lambda self: self.expression( 1108 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1109 ), 1110 "EXCLUDE": lambda self: self.expression( 1111 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1112 ), 1113 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1114 "FORMAT": lambda self: self.expression( 1115 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1116 ), 1117 "GENERATED": lambda self: self._parse_generated_as_identity(), 1118 "IDENTITY": lambda self: self._parse_auto_increment(), 1119 "INLINE": lambda self: self._parse_inline(), 1120 "LIKE": lambda self: self._parse_create_like(), 1121 "NOT": lambda self: self._parse_not_constraint(), 1122 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1123 "ON": lambda self: ( 1124 self._match(TokenType.UPDATE) 1125 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1126 ) 1127 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1128 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1129 "PERIOD": lambda self: self._parse_period_for_system_time(), 1130 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1131 "REFERENCES": lambda self: self._parse_references(match=False), 1132 "TITLE": lambda self: self.expression( 1133 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1134 ), 1135 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1136 "UNIQUE": lambda self: self._parse_unique(), 1137 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1138 "WATERMARK": lambda self: self.expression( 1139 exp.WatermarkColumnConstraint, 1140 this=self._match(TokenType.FOR) and self._parse_column(), 1141 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1142 ), 1143 "WITH": lambda self: self.expression( 1144 exp.Properties, expressions=self._parse_wrapped_properties() 1145 ), 1146 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1147 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1148 } 1149 1150 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1151 if not self._match(TokenType.L_PAREN, advance=False): 1152 # Partitioning by bucket or truncate follows the syntax: 1153 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1154 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1155 self._retreat(self._index - 1) 1156 return None 1157 1158 klass = ( 1159 exp.PartitionedByBucket 1160 if self._prev.text.upper() == "BUCKET" 1161 else exp.PartitionByTruncate 1162 ) 1163 1164 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1165 this, expression = seq_get(args, 0), seq_get(args, 1) 1166 1167 if isinstance(this, exp.Literal): 1168 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1169 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1170 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1171 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1172 # 1173 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1174 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1175 this, expression = expression, this 1176 1177 return self.expression(klass, this=this, expression=expression) 1178 1179 ALTER_PARSERS = { 1180 "ADD": lambda self: self._parse_alter_table_add(), 1181 "AS": lambda self: self._parse_select(), 1182 "ALTER": lambda self: self._parse_alter_table_alter(), 1183 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1184 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1185 "DROP": lambda self: self._parse_alter_table_drop(), 1186 "RENAME": lambda self: self._parse_alter_table_rename(), 1187 "SET": lambda self: self._parse_alter_table_set(), 1188 "SWAP": lambda self: self.expression( 1189 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1190 ), 1191 } 1192 1193 ALTER_ALTER_PARSERS = { 1194 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1195 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1196 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1197 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1198 } 1199 1200 SCHEMA_UNNAMED_CONSTRAINTS = { 1201 "CHECK", 1202 "EXCLUDE", 1203 "FOREIGN KEY", 1204 "LIKE", 1205 "PERIOD", 1206 "PRIMARY KEY", 1207 "UNIQUE", 1208 "WATERMARK", 1209 "BUCKET", 1210 "TRUNCATE", 1211 } 1212 1213 NO_PAREN_FUNCTION_PARSERS = { 1214 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1215 "CASE": lambda self: self._parse_case(), 1216 "CONNECT_BY_ROOT": lambda self: self.expression( 1217 exp.ConnectByRoot, this=self._parse_column() 1218 ), 1219 "IF": lambda self: self._parse_if(), 1220 } 1221 1222 INVALID_FUNC_NAME_TOKENS = { 1223 TokenType.IDENTIFIER, 1224 TokenType.STRING, 1225 } 1226 1227 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1228 1229 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1230 1231 FUNCTION_PARSERS = { 1232 **{ 1233 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1234 }, 1235 **{ 1236 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1237 }, 1238 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1239 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1240 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1241 "DECODE": lambda self: self._parse_decode(), 1242 "EXTRACT": lambda self: self._parse_extract(), 1243 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1244 "GAP_FILL": lambda self: self._parse_gap_fill(), 1245 "JSON_OBJECT": lambda self: self._parse_json_object(), 1246 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1247 "JSON_TABLE": lambda self: self._parse_json_table(), 1248 "MATCH": lambda self: self._parse_match_against(), 1249 "NORMALIZE": lambda self: self._parse_normalize(), 1250 "OPENJSON": lambda self: self._parse_open_json(), 1251 "OVERLAY": lambda self: self._parse_overlay(), 1252 "POSITION": lambda self: self._parse_position(), 1253 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1254 "STRING_AGG": lambda self: self._parse_string_agg(), 1255 "SUBSTRING": lambda self: self._parse_substring(), 1256 "TRIM": lambda self: self._parse_trim(), 1257 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1259 "XMLELEMENT": lambda self: self.expression( 1260 exp.XMLElement, 1261 this=self._match_text_seq("NAME") and self._parse_id_var(), 1262 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1263 ), 1264 "XMLTABLE": lambda self: self._parse_xml_table(), 1265 } 1266 1267 QUERY_MODIFIER_PARSERS = { 1268 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1269 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1270 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1271 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1272 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1273 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1274 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1275 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1276 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1277 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1278 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1279 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1280 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1281 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1282 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1283 TokenType.CLUSTER_BY: lambda self: ( 1284 "cluster", 1285 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1286 ), 1287 TokenType.DISTRIBUTE_BY: lambda self: ( 1288 "distribute", 1289 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1290 ), 1291 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1292 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1293 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1294 } 1295 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1296 1297 SET_PARSERS = { 1298 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1299 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1300 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1301 "TRANSACTION": lambda self: self._parse_set_transaction(), 1302 } 1303 1304 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1305 1306 TYPE_LITERAL_PARSERS = { 1307 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1308 } 1309 1310 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1311 1312 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1313 1314 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1315 1316 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1317 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1318 "ISOLATION": ( 1319 ("LEVEL", "REPEATABLE", "READ"), 1320 ("LEVEL", "READ", "COMMITTED"), 1321 ("LEVEL", "READ", "UNCOMITTED"), 1322 ("LEVEL", "SERIALIZABLE"), 1323 ), 1324 "READ": ("WRITE", "ONLY"), 1325 } 1326 1327 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1328 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1329 ) 1330 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1331 1332 CREATE_SEQUENCE: OPTIONS_TYPE = { 1333 "SCALE": ("EXTEND", "NOEXTEND"), 1334 "SHARD": ("EXTEND", "NOEXTEND"), 1335 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1336 **dict.fromkeys( 1337 ( 1338 "SESSION", 1339 "GLOBAL", 1340 "KEEP", 1341 "NOKEEP", 1342 "ORDER", 1343 "NOORDER", 1344 "NOCACHE", 1345 "CYCLE", 1346 "NOCYCLE", 1347 "NOMINVALUE", 1348 "NOMAXVALUE", 1349 "NOSCALE", 1350 "NOSHARD", 1351 ), 1352 tuple(), 1353 ), 1354 } 1355 1356 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1357 1358 USABLES: OPTIONS_TYPE = dict.fromkeys( 1359 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1360 ) 1361 1362 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1363 1364 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1365 "TYPE": ("EVOLUTION",), 1366 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1367 } 1368 1369 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1370 1371 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1372 1373 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1374 "NOT": ("ENFORCED",), 1375 "MATCH": ( 1376 "FULL", 1377 "PARTIAL", 1378 "SIMPLE", 1379 ), 1380 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1381 "USING": ( 1382 "BTREE", 1383 "HASH", 1384 ), 1385 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1386 } 1387 1388 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1389 "NO": ("OTHERS",), 1390 "CURRENT": ("ROW",), 1391 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1392 } 1393 1394 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1395 1396 CLONE_KEYWORDS = {"CLONE", "COPY"} 1397 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1398 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1399 1400 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1401 1402 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1403 1404 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1405 1406 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1407 1408 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1409 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1410 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1411 1412 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1413 1414 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1415 1416 ADD_CONSTRAINT_TOKENS = { 1417 TokenType.CONSTRAINT, 1418 TokenType.FOREIGN_KEY, 1419 TokenType.INDEX, 1420 TokenType.KEY, 1421 TokenType.PRIMARY_KEY, 1422 TokenType.UNIQUE, 1423 } 1424 1425 DISTINCT_TOKENS = {TokenType.DISTINCT} 1426 1427 NULL_TOKENS = {TokenType.NULL} 1428 1429 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1430 1431 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1432 1433 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1434 1435 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1436 1437 ODBC_DATETIME_LITERALS = { 1438 "d": exp.Date, 1439 "t": exp.Time, 1440 "ts": exp.Timestamp, 1441 } 1442 1443 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1444 1445 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1446 1447 # The style options for the DESCRIBE statement 1448 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1449 1450 # The style options for the ANALYZE statement 1451 ANALYZE_STYLES = { 1452 "BUFFER_USAGE_LIMIT", 1453 "FULL", 1454 "LOCAL", 1455 "NO_WRITE_TO_BINLOG", 1456 "SAMPLE", 1457 "SKIP_LOCKED", 1458 "VERBOSE", 1459 } 1460 1461 ANALYZE_EXPRESSION_PARSERS = { 1462 "ALL": lambda self: self._parse_analyze_columns(), 1463 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1464 "DELETE": lambda self: self._parse_analyze_delete(), 1465 "DROP": lambda self: self._parse_analyze_histogram(), 1466 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1467 "LIST": lambda self: self._parse_analyze_list(), 1468 "PREDICATE": lambda self: self._parse_analyze_columns(), 1469 "UPDATE": lambda self: self._parse_analyze_histogram(), 1470 "VALIDATE": lambda self: self._parse_analyze_validate(), 1471 } 1472 1473 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1474 1475 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1476 1477 OPERATION_MODIFIERS: t.Set[str] = set() 1478 1479 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1480 1481 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1482 1483 STRICT_CAST = True 1484 1485 PREFIXED_PIVOT_COLUMNS = False 1486 IDENTIFY_PIVOT_STRINGS = False 1487 1488 LOG_DEFAULTS_TO_LN = False 1489 1490 # Whether the table sample clause expects CSV syntax 1491 TABLESAMPLE_CSV = False 1492 1493 # The default method used for table sampling 1494 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1495 1496 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1497 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1498 1499 # Whether the TRIM function expects the characters to trim as its first argument 1500 TRIM_PATTERN_FIRST = False 1501 1502 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1503 STRING_ALIASES = False 1504 1505 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1506 MODIFIERS_ATTACHED_TO_SET_OP = True 1507 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1508 1509 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1510 NO_PAREN_IF_COMMANDS = True 1511 1512 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1513 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1514 1515 # Whether the `:` operator is used to extract a value from a VARIANT column 1516 COLON_IS_VARIANT_EXTRACT = False 1517 1518 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1519 # If this is True and '(' is not found, the keyword will be treated as an identifier 1520 VALUES_FOLLOWED_BY_PAREN = True 1521 1522 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1523 SUPPORTS_IMPLICIT_UNNEST = False 1524 1525 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1526 INTERVAL_SPANS = True 1527 1528 # Whether a PARTITION clause can follow a table reference 1529 SUPPORTS_PARTITION_SELECTION = False 1530 1531 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1532 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1533 1534 # Whether the 'AS' keyword is optional in the CTE definition syntax 1535 OPTIONAL_ALIAS_TOKEN_CTE = True 1536 1537 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1538 ALTER_RENAME_REQUIRES_COLUMN = True 1539 1540 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1541 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1542 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1543 # as BigQuery, where all joins have the same precedence. 1544 JOINS_HAVE_EQUAL_PRECEDENCE = False 1545 1546 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1547 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1548 1549 # Whether map literals support arbitrary expressions as keys. 1550 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1551 # When False, keys are typically restricted to identifiers. 1552 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1553 1554 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1555 # is true for Snowflake but not for BigQuery which can also process strings 1556 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1557 1558 __slots__ = ( 1559 "error_level", 1560 "error_message_context", 1561 "max_errors", 1562 "dialect", 1563 "sql", 1564 "errors", 1565 "_tokens", 1566 "_index", 1567 "_curr", 1568 "_next", 1569 "_prev", 1570 "_prev_comments", 1571 "_pipe_cte_counter", 1572 ) 1573 1574 # Autofilled 1575 SHOW_TRIE: t.Dict = {} 1576 SET_TRIE: t.Dict = {} 1577 1578 def __init__( 1579 self, 1580 error_level: t.Optional[ErrorLevel] = None, 1581 error_message_context: int = 100, 1582 max_errors: int = 3, 1583 dialect: DialectType = None, 1584 ): 1585 from sqlglot.dialects import Dialect 1586 1587 self.error_level = error_level or ErrorLevel.IMMEDIATE 1588 self.error_message_context = error_message_context 1589 self.max_errors = max_errors 1590 self.dialect = Dialect.get_or_raise(dialect) 1591 self.reset() 1592 1593 def reset(self): 1594 self.sql = "" 1595 self.errors = [] 1596 self._tokens = [] 1597 self._index = 0 1598 self._curr = None 1599 self._next = None 1600 self._prev = None 1601 self._prev_comments = None 1602 self._pipe_cte_counter = 0 1603 1604 def parse( 1605 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1606 ) -> t.List[t.Optional[exp.Expression]]: 1607 """ 1608 Parses a list of tokens and returns a list of syntax trees, one tree 1609 per parsed SQL statement. 1610 1611 Args: 1612 raw_tokens: The list of tokens. 1613 sql: The original SQL string, used to produce helpful debug messages. 1614 1615 Returns: 1616 The list of the produced syntax trees. 1617 """ 1618 return self._parse( 1619 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1620 ) 1621 1622 def parse_into( 1623 self, 1624 expression_types: exp.IntoType, 1625 raw_tokens: t.List[Token], 1626 sql: t.Optional[str] = None, 1627 ) -> t.List[t.Optional[exp.Expression]]: 1628 """ 1629 Parses a list of tokens into a given Expression type. If a collection of Expression 1630 types is given instead, this method will try to parse the token list into each one 1631 of them, stopping at the first for which the parsing succeeds. 1632 1633 Args: 1634 expression_types: The expression type(s) to try and parse the token list into. 1635 raw_tokens: The list of tokens. 1636 sql: The original SQL string, used to produce helpful debug messages. 1637 1638 Returns: 1639 The target Expression. 1640 """ 1641 errors = [] 1642 for expression_type in ensure_list(expression_types): 1643 parser = self.EXPRESSION_PARSERS.get(expression_type) 1644 if not parser: 1645 raise TypeError(f"No parser registered for {expression_type}") 1646 1647 try: 1648 return self._parse(parser, raw_tokens, sql) 1649 except ParseError as e: 1650 e.errors[0]["into_expression"] = expression_type 1651 errors.append(e) 1652 1653 raise ParseError( 1654 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1655 errors=merge_errors(errors), 1656 ) from errors[-1] 1657 1658 def _parse( 1659 self, 1660 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1661 raw_tokens: t.List[Token], 1662 sql: t.Optional[str] = None, 1663 ) -> t.List[t.Optional[exp.Expression]]: 1664 self.reset() 1665 self.sql = sql or "" 1666 1667 total = len(raw_tokens) 1668 chunks: t.List[t.List[Token]] = [[]] 1669 1670 for i, token in enumerate(raw_tokens): 1671 if token.token_type == TokenType.SEMICOLON: 1672 if token.comments: 1673 chunks.append([token]) 1674 1675 if i < total - 1: 1676 chunks.append([]) 1677 else: 1678 chunks[-1].append(token) 1679 1680 expressions = [] 1681 1682 for tokens in chunks: 1683 self._index = -1 1684 self._tokens = tokens 1685 self._advance() 1686 1687 expressions.append(parse_method(self)) 1688 1689 if self._index < len(self._tokens): 1690 self.raise_error("Invalid expression / Unexpected token") 1691 1692 self.check_errors() 1693 1694 return expressions 1695 1696 def check_errors(self) -> None: 1697 """Logs or raises any found errors, depending on the chosen error level setting.""" 1698 if self.error_level == ErrorLevel.WARN: 1699 for error in self.errors: 1700 logger.error(str(error)) 1701 elif self.error_level == ErrorLevel.RAISE and self.errors: 1702 raise ParseError( 1703 concat_messages(self.errors, self.max_errors), 1704 errors=merge_errors(self.errors), 1705 ) 1706 1707 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1708 """ 1709 Appends an error in the list of recorded errors or raises it, depending on the chosen 1710 error level setting. 1711 """ 1712 token = token or self._curr or self._prev or Token.string("") 1713 start = token.start 1714 end = token.end + 1 1715 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1716 highlight = self.sql[start:end] 1717 end_context = self.sql[end : end + self.error_message_context] 1718 1719 error = ParseError.new( 1720 f"{message}. Line {token.line}, Col: {token.col}.\n" 1721 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1722 description=message, 1723 line=token.line, 1724 col=token.col, 1725 start_context=start_context, 1726 highlight=highlight, 1727 end_context=end_context, 1728 ) 1729 1730 if self.error_level == ErrorLevel.IMMEDIATE: 1731 raise error 1732 1733 self.errors.append(error) 1734 1735 def expression( 1736 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1737 ) -> E: 1738 """ 1739 Creates a new, validated Expression. 1740 1741 Args: 1742 exp_class: The expression class to instantiate. 1743 comments: An optional list of comments to attach to the expression. 1744 kwargs: The arguments to set for the expression along with their respective values. 1745 1746 Returns: 1747 The target expression. 1748 """ 1749 instance = exp_class(**kwargs) 1750 instance.add_comments(comments) if comments else self._add_comments(instance) 1751 return self.validate_expression(instance) 1752 1753 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1754 if expression and self._prev_comments: 1755 expression.add_comments(self._prev_comments) 1756 self._prev_comments = None 1757 1758 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1759 """ 1760 Validates an Expression, making sure that all its mandatory arguments are set. 1761 1762 Args: 1763 expression: The expression to validate. 1764 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1765 1766 Returns: 1767 The validated expression. 1768 """ 1769 if self.error_level != ErrorLevel.IGNORE: 1770 for error_message in expression.error_messages(args): 1771 self.raise_error(error_message) 1772 1773 return expression 1774 1775 def _find_sql(self, start: Token, end: Token) -> str: 1776 return self.sql[start.start : end.end + 1] 1777 1778 def _is_connected(self) -> bool: 1779 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1780 1781 def _advance(self, times: int = 1) -> None: 1782 self._index += times 1783 self._curr = seq_get(self._tokens, self._index) 1784 self._next = seq_get(self._tokens, self._index + 1) 1785 1786 if self._index > 0: 1787 self._prev = self._tokens[self._index - 1] 1788 self._prev_comments = self._prev.comments 1789 else: 1790 self._prev = None 1791 self._prev_comments = None 1792 1793 def _retreat(self, index: int) -> None: 1794 if index != self._index: 1795 self._advance(index - self._index) 1796 1797 def _warn_unsupported(self) -> None: 1798 if len(self._tokens) <= 1: 1799 return 1800 1801 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1802 # interested in emitting a warning for the one being currently processed. 1803 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1804 1805 logger.warning( 1806 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1807 ) 1808 1809 def _parse_command(self) -> exp.Command: 1810 self._warn_unsupported() 1811 return self.expression( 1812 exp.Command, 1813 comments=self._prev_comments, 1814 this=self._prev.text.upper(), 1815 expression=self._parse_string(), 1816 ) 1817 1818 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1819 """ 1820 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1821 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1822 solve this by setting & resetting the parser state accordingly 1823 """ 1824 index = self._index 1825 error_level = self.error_level 1826 1827 self.error_level = ErrorLevel.IMMEDIATE 1828 try: 1829 this = parse_method() 1830 except ParseError: 1831 this = None 1832 finally: 1833 if not this or retreat: 1834 self._retreat(index) 1835 self.error_level = error_level 1836 1837 return this 1838 1839 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1840 start = self._prev 1841 exists = self._parse_exists() if allow_exists else None 1842 1843 self._match(TokenType.ON) 1844 1845 materialized = self._match_text_seq("MATERIALIZED") 1846 kind = self._match_set(self.CREATABLES) and self._prev 1847 if not kind: 1848 return self._parse_as_command(start) 1849 1850 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1851 this = self._parse_user_defined_function(kind=kind.token_type) 1852 elif kind.token_type == TokenType.TABLE: 1853 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1854 elif kind.token_type == TokenType.COLUMN: 1855 this = self._parse_column() 1856 else: 1857 this = self._parse_id_var() 1858 1859 self._match(TokenType.IS) 1860 1861 return self.expression( 1862 exp.Comment, 1863 this=this, 1864 kind=kind.text, 1865 expression=self._parse_string(), 1866 exists=exists, 1867 materialized=materialized, 1868 ) 1869 1870 def _parse_to_table( 1871 self, 1872 ) -> exp.ToTableProperty: 1873 table = self._parse_table_parts(schema=True) 1874 return self.expression(exp.ToTableProperty, this=table) 1875 1876 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1877 def _parse_ttl(self) -> exp.Expression: 1878 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1879 this = self._parse_bitwise() 1880 1881 if self._match_text_seq("DELETE"): 1882 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1883 if self._match_text_seq("RECOMPRESS"): 1884 return self.expression( 1885 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1886 ) 1887 if self._match_text_seq("TO", "DISK"): 1888 return self.expression( 1889 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1890 ) 1891 if self._match_text_seq("TO", "VOLUME"): 1892 return self.expression( 1893 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1894 ) 1895 1896 return this 1897 1898 expressions = self._parse_csv(_parse_ttl_action) 1899 where = self._parse_where() 1900 group = self._parse_group() 1901 1902 aggregates = None 1903 if group and self._match(TokenType.SET): 1904 aggregates = self._parse_csv(self._parse_set_item) 1905 1906 return self.expression( 1907 exp.MergeTreeTTL, 1908 expressions=expressions, 1909 where=where, 1910 group=group, 1911 aggregates=aggregates, 1912 ) 1913 1914 def _parse_statement(self) -> t.Optional[exp.Expression]: 1915 if self._curr is None: 1916 return None 1917 1918 if self._match_set(self.STATEMENT_PARSERS): 1919 comments = self._prev_comments 1920 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1921 stmt.add_comments(comments, prepend=True) 1922 return stmt 1923 1924 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1925 return self._parse_command() 1926 1927 expression = self._parse_expression() 1928 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1929 return self._parse_query_modifiers(expression) 1930 1931 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1932 start = self._prev 1933 temporary = self._match(TokenType.TEMPORARY) 1934 materialized = self._match_text_seq("MATERIALIZED") 1935 1936 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1937 if not kind: 1938 return self._parse_as_command(start) 1939 1940 concurrently = self._match_text_seq("CONCURRENTLY") 1941 if_exists = exists or self._parse_exists() 1942 1943 if kind == "COLUMN": 1944 this = self._parse_column() 1945 else: 1946 this = self._parse_table_parts( 1947 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1948 ) 1949 1950 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1951 1952 if self._match(TokenType.L_PAREN, advance=False): 1953 expressions = self._parse_wrapped_csv(self._parse_types) 1954 else: 1955 expressions = None 1956 1957 return self.expression( 1958 exp.Drop, 1959 exists=if_exists, 1960 this=this, 1961 expressions=expressions, 1962 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1963 temporary=temporary, 1964 materialized=materialized, 1965 cascade=self._match_text_seq("CASCADE"), 1966 constraints=self._match_text_seq("CONSTRAINTS"), 1967 purge=self._match_text_seq("PURGE"), 1968 cluster=cluster, 1969 concurrently=concurrently, 1970 ) 1971 1972 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1973 return ( 1974 self._match_text_seq("IF") 1975 and (not not_ or self._match(TokenType.NOT)) 1976 and self._match(TokenType.EXISTS) 1977 ) 1978 1979 def _parse_create(self) -> exp.Create | exp.Command: 1980 # Note: this can't be None because we've matched a statement parser 1981 start = self._prev 1982 1983 replace = ( 1984 start.token_type == TokenType.REPLACE 1985 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1986 or self._match_pair(TokenType.OR, TokenType.ALTER) 1987 ) 1988 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1989 1990 unique = self._match(TokenType.UNIQUE) 1991 1992 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1993 clustered = True 1994 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1995 "COLUMNSTORE" 1996 ): 1997 clustered = False 1998 else: 1999 clustered = None 2000 2001 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2002 self._advance() 2003 2004 properties = None 2005 create_token = self._match_set(self.CREATABLES) and self._prev 2006 2007 if not create_token: 2008 # exp.Properties.Location.POST_CREATE 2009 properties = self._parse_properties() 2010 create_token = self._match_set(self.CREATABLES) and self._prev 2011 2012 if not properties or not create_token: 2013 return self._parse_as_command(start) 2014 2015 concurrently = self._match_text_seq("CONCURRENTLY") 2016 exists = self._parse_exists(not_=True) 2017 this = None 2018 expression: t.Optional[exp.Expression] = None 2019 indexes = None 2020 no_schema_binding = None 2021 begin = None 2022 end = None 2023 clone = None 2024 2025 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2026 nonlocal properties 2027 if properties and temp_props: 2028 properties.expressions.extend(temp_props.expressions) 2029 elif temp_props: 2030 properties = temp_props 2031 2032 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2033 this = self._parse_user_defined_function(kind=create_token.token_type) 2034 2035 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2036 extend_props(self._parse_properties()) 2037 2038 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2039 extend_props(self._parse_properties()) 2040 2041 if not expression: 2042 if self._match(TokenType.COMMAND): 2043 expression = self._parse_as_command(self._prev) 2044 else: 2045 begin = self._match(TokenType.BEGIN) 2046 return_ = self._match_text_seq("RETURN") 2047 2048 if self._match(TokenType.STRING, advance=False): 2049 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2050 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2051 expression = self._parse_string() 2052 extend_props(self._parse_properties()) 2053 else: 2054 expression = self._parse_user_defined_function_expression() 2055 2056 end = self._match_text_seq("END") 2057 2058 if return_: 2059 expression = self.expression(exp.Return, this=expression) 2060 elif create_token.token_type == TokenType.INDEX: 2061 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2062 if not self._match(TokenType.ON): 2063 index = self._parse_id_var() 2064 anonymous = False 2065 else: 2066 index = None 2067 anonymous = True 2068 2069 this = self._parse_index(index=index, anonymous=anonymous) 2070 elif create_token.token_type in self.DB_CREATABLES: 2071 table_parts = self._parse_table_parts( 2072 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2073 ) 2074 2075 # exp.Properties.Location.POST_NAME 2076 self._match(TokenType.COMMA) 2077 extend_props(self._parse_properties(before=True)) 2078 2079 this = self._parse_schema(this=table_parts) 2080 2081 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2082 extend_props(self._parse_properties()) 2083 2084 has_alias = self._match(TokenType.ALIAS) 2085 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2086 # exp.Properties.Location.POST_ALIAS 2087 extend_props(self._parse_properties()) 2088 2089 if create_token.token_type == TokenType.SEQUENCE: 2090 expression = self._parse_types() 2091 props = self._parse_properties() 2092 if props: 2093 sequence_props = exp.SequenceProperties() 2094 options = [] 2095 for prop in props: 2096 if isinstance(prop, exp.SequenceProperties): 2097 for arg, value in prop.args.items(): 2098 if arg == "options": 2099 options.extend(value) 2100 else: 2101 sequence_props.set(arg, value) 2102 prop.pop() 2103 2104 if options: 2105 sequence_props.set("options", options) 2106 2107 props.append("expressions", sequence_props) 2108 extend_props(props) 2109 else: 2110 expression = self._parse_ddl_select() 2111 2112 # Some dialects also support using a table as an alias instead of a SELECT. 2113 # Here we fallback to this as an alternative. 2114 if not expression and has_alias: 2115 expression = self._try_parse(self._parse_table_parts) 2116 2117 if create_token.token_type == TokenType.TABLE: 2118 # exp.Properties.Location.POST_EXPRESSION 2119 extend_props(self._parse_properties()) 2120 2121 indexes = [] 2122 while True: 2123 index = self._parse_index() 2124 2125 # exp.Properties.Location.POST_INDEX 2126 extend_props(self._parse_properties()) 2127 if not index: 2128 break 2129 else: 2130 self._match(TokenType.COMMA) 2131 indexes.append(index) 2132 elif create_token.token_type == TokenType.VIEW: 2133 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2134 no_schema_binding = True 2135 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2136 extend_props(self._parse_properties()) 2137 2138 shallow = self._match_text_seq("SHALLOW") 2139 2140 if self._match_texts(self.CLONE_KEYWORDS): 2141 copy = self._prev.text.lower() == "copy" 2142 clone = self.expression( 2143 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2144 ) 2145 2146 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2147 return self._parse_as_command(start) 2148 2149 create_kind_text = create_token.text.upper() 2150 return self.expression( 2151 exp.Create, 2152 this=this, 2153 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2154 replace=replace, 2155 refresh=refresh, 2156 unique=unique, 2157 expression=expression, 2158 exists=exists, 2159 properties=properties, 2160 indexes=indexes, 2161 no_schema_binding=no_schema_binding, 2162 begin=begin, 2163 end=end, 2164 clone=clone, 2165 concurrently=concurrently, 2166 clustered=clustered, 2167 ) 2168 2169 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2170 seq = exp.SequenceProperties() 2171 2172 options = [] 2173 index = self._index 2174 2175 while self._curr: 2176 self._match(TokenType.COMMA) 2177 if self._match_text_seq("INCREMENT"): 2178 self._match_text_seq("BY") 2179 self._match_text_seq("=") 2180 seq.set("increment", self._parse_term()) 2181 elif self._match_text_seq("MINVALUE"): 2182 seq.set("minvalue", self._parse_term()) 2183 elif self._match_text_seq("MAXVALUE"): 2184 seq.set("maxvalue", self._parse_term()) 2185 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2186 self._match_text_seq("=") 2187 seq.set("start", self._parse_term()) 2188 elif self._match_text_seq("CACHE"): 2189 # T-SQL allows empty CACHE which is initialized dynamically 2190 seq.set("cache", self._parse_number() or True) 2191 elif self._match_text_seq("OWNED", "BY"): 2192 # "OWNED BY NONE" is the default 2193 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2194 else: 2195 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2196 if opt: 2197 options.append(opt) 2198 else: 2199 break 2200 2201 seq.set("options", options if options else None) 2202 return None if self._index == index else seq 2203 2204 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2205 # only used for teradata currently 2206 self._match(TokenType.COMMA) 2207 2208 kwargs = { 2209 "no": self._match_text_seq("NO"), 2210 "dual": self._match_text_seq("DUAL"), 2211 "before": self._match_text_seq("BEFORE"), 2212 "default": self._match_text_seq("DEFAULT"), 2213 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2214 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2215 "after": self._match_text_seq("AFTER"), 2216 "minimum": self._match_texts(("MIN", "MINIMUM")), 2217 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2218 } 2219 2220 if self._match_texts(self.PROPERTY_PARSERS): 2221 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2222 try: 2223 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2224 except TypeError: 2225 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2226 2227 return None 2228 2229 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2230 return self._parse_wrapped_csv(self._parse_property) 2231 2232 def _parse_property(self) -> t.Optional[exp.Expression]: 2233 if self._match_texts(self.PROPERTY_PARSERS): 2234 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2235 2236 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2237 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2238 2239 if self._match_text_seq("COMPOUND", "SORTKEY"): 2240 return self._parse_sortkey(compound=True) 2241 2242 if self._match_text_seq("SQL", "SECURITY"): 2243 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2244 2245 index = self._index 2246 2247 seq_props = self._parse_sequence_properties() 2248 if seq_props: 2249 return seq_props 2250 2251 self._retreat(index) 2252 key = self._parse_column() 2253 2254 if not self._match(TokenType.EQ): 2255 self._retreat(index) 2256 return None 2257 2258 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2259 if isinstance(key, exp.Column): 2260 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2261 2262 value = self._parse_bitwise() or self._parse_var(any_token=True) 2263 2264 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2265 if isinstance(value, exp.Column): 2266 value = exp.var(value.name) 2267 2268 return self.expression(exp.Property, this=key, value=value) 2269 2270 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2271 if self._match_text_seq("BY"): 2272 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2273 2274 self._match(TokenType.ALIAS) 2275 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2276 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2277 2278 return self.expression( 2279 exp.FileFormatProperty, 2280 this=( 2281 self.expression( 2282 exp.InputOutputFormat, 2283 input_format=input_format, 2284 output_format=output_format, 2285 ) 2286 if input_format or output_format 2287 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2288 ), 2289 hive_format=True, 2290 ) 2291 2292 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2293 field = self._parse_field() 2294 if isinstance(field, exp.Identifier) and not field.quoted: 2295 field = exp.var(field) 2296 2297 return field 2298 2299 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2300 self._match(TokenType.EQ) 2301 self._match(TokenType.ALIAS) 2302 2303 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2304 2305 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2306 properties = [] 2307 while True: 2308 if before: 2309 prop = self._parse_property_before() 2310 else: 2311 prop = self._parse_property() 2312 if not prop: 2313 break 2314 for p in ensure_list(prop): 2315 properties.append(p) 2316 2317 if properties: 2318 return self.expression(exp.Properties, expressions=properties) 2319 2320 return None 2321 2322 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2323 return self.expression( 2324 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2325 ) 2326 2327 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2328 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2329 security_specifier = self._prev.text.upper() 2330 return self.expression(exp.SecurityProperty, this=security_specifier) 2331 return None 2332 2333 def _parse_settings_property(self) -> exp.SettingsProperty: 2334 return self.expression( 2335 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2336 ) 2337 2338 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2339 if self._index >= 2: 2340 pre_volatile_token = self._tokens[self._index - 2] 2341 else: 2342 pre_volatile_token = None 2343 2344 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2345 return exp.VolatileProperty() 2346 2347 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2348 2349 def _parse_retention_period(self) -> exp.Var: 2350 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2351 number = self._parse_number() 2352 number_str = f"{number} " if number else "" 2353 unit = self._parse_var(any_token=True) 2354 return exp.var(f"{number_str}{unit}") 2355 2356 def _parse_system_versioning_property( 2357 self, with_: bool = False 2358 ) -> exp.WithSystemVersioningProperty: 2359 self._match(TokenType.EQ) 2360 prop = self.expression( 2361 exp.WithSystemVersioningProperty, 2362 **{ # type: ignore 2363 "on": True, 2364 "with": with_, 2365 }, 2366 ) 2367 2368 if self._match_text_seq("OFF"): 2369 prop.set("on", False) 2370 return prop 2371 2372 self._match(TokenType.ON) 2373 if self._match(TokenType.L_PAREN): 2374 while self._curr and not self._match(TokenType.R_PAREN): 2375 if self._match_text_seq("HISTORY_TABLE", "="): 2376 prop.set("this", self._parse_table_parts()) 2377 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2378 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2379 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2380 prop.set("retention_period", self._parse_retention_period()) 2381 2382 self._match(TokenType.COMMA) 2383 2384 return prop 2385 2386 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2387 self._match(TokenType.EQ) 2388 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2389 prop = self.expression(exp.DataDeletionProperty, on=on) 2390 2391 if self._match(TokenType.L_PAREN): 2392 while self._curr and not self._match(TokenType.R_PAREN): 2393 if self._match_text_seq("FILTER_COLUMN", "="): 2394 prop.set("filter_column", self._parse_column()) 2395 elif self._match_text_seq("RETENTION_PERIOD", "="): 2396 prop.set("retention_period", self._parse_retention_period()) 2397 2398 self._match(TokenType.COMMA) 2399 2400 return prop 2401 2402 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2403 kind = "HASH" 2404 expressions: t.Optional[t.List[exp.Expression]] = None 2405 if self._match_text_seq("BY", "HASH"): 2406 expressions = self._parse_wrapped_csv(self._parse_id_var) 2407 elif self._match_text_seq("BY", "RANDOM"): 2408 kind = "RANDOM" 2409 2410 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2411 buckets: t.Optional[exp.Expression] = None 2412 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2413 buckets = self._parse_number() 2414 2415 return self.expression( 2416 exp.DistributedByProperty, 2417 expressions=expressions, 2418 kind=kind, 2419 buckets=buckets, 2420 order=self._parse_order(), 2421 ) 2422 2423 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2424 self._match_text_seq("KEY") 2425 expressions = self._parse_wrapped_id_vars() 2426 return self.expression(expr_type, expressions=expressions) 2427 2428 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2429 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2430 prop = self._parse_system_versioning_property(with_=True) 2431 self._match_r_paren() 2432 return prop 2433 2434 if self._match(TokenType.L_PAREN, advance=False): 2435 return self._parse_wrapped_properties() 2436 2437 if self._match_text_seq("JOURNAL"): 2438 return self._parse_withjournaltable() 2439 2440 if self._match_texts(self.VIEW_ATTRIBUTES): 2441 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2442 2443 if self._match_text_seq("DATA"): 2444 return self._parse_withdata(no=False) 2445 elif self._match_text_seq("NO", "DATA"): 2446 return self._parse_withdata(no=True) 2447 2448 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2449 return self._parse_serde_properties(with_=True) 2450 2451 if self._match(TokenType.SCHEMA): 2452 return self.expression( 2453 exp.WithSchemaBindingProperty, 2454 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2455 ) 2456 2457 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2458 return self.expression( 2459 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2460 ) 2461 2462 if not self._next: 2463 return None 2464 2465 return self._parse_withisolatedloading() 2466 2467 def _parse_procedure_option(self) -> exp.Expression | None: 2468 if self._match_text_seq("EXECUTE", "AS"): 2469 return self.expression( 2470 exp.ExecuteAsProperty, 2471 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2472 or self._parse_string(), 2473 ) 2474 2475 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2476 2477 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2478 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2479 self._match(TokenType.EQ) 2480 2481 user = self._parse_id_var() 2482 self._match(TokenType.PARAMETER) 2483 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2484 2485 if not user or not host: 2486 return None 2487 2488 return exp.DefinerProperty(this=f"{user}@{host}") 2489 2490 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2491 self._match(TokenType.TABLE) 2492 self._match(TokenType.EQ) 2493 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2494 2495 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2496 return self.expression(exp.LogProperty, no=no) 2497 2498 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2499 return self.expression(exp.JournalProperty, **kwargs) 2500 2501 def _parse_checksum(self) -> exp.ChecksumProperty: 2502 self._match(TokenType.EQ) 2503 2504 on = None 2505 if self._match(TokenType.ON): 2506 on = True 2507 elif self._match_text_seq("OFF"): 2508 on = False 2509 2510 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2511 2512 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2513 return self.expression( 2514 exp.Cluster, 2515 expressions=( 2516 self._parse_wrapped_csv(self._parse_ordered) 2517 if wrapped 2518 else self._parse_csv(self._parse_ordered) 2519 ), 2520 ) 2521 2522 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2523 self._match_text_seq("BY") 2524 2525 self._match_l_paren() 2526 expressions = self._parse_csv(self._parse_column) 2527 self._match_r_paren() 2528 2529 if self._match_text_seq("SORTED", "BY"): 2530 self._match_l_paren() 2531 sorted_by = self._parse_csv(self._parse_ordered) 2532 self._match_r_paren() 2533 else: 2534 sorted_by = None 2535 2536 self._match(TokenType.INTO) 2537 buckets = self._parse_number() 2538 self._match_text_seq("BUCKETS") 2539 2540 return self.expression( 2541 exp.ClusteredByProperty, 2542 expressions=expressions, 2543 sorted_by=sorted_by, 2544 buckets=buckets, 2545 ) 2546 2547 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2548 if not self._match_text_seq("GRANTS"): 2549 self._retreat(self._index - 1) 2550 return None 2551 2552 return self.expression(exp.CopyGrantsProperty) 2553 2554 def _parse_freespace(self) -> exp.FreespaceProperty: 2555 self._match(TokenType.EQ) 2556 return self.expression( 2557 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2558 ) 2559 2560 def _parse_mergeblockratio( 2561 self, no: bool = False, default: bool = False 2562 ) -> exp.MergeBlockRatioProperty: 2563 if self._match(TokenType.EQ): 2564 return self.expression( 2565 exp.MergeBlockRatioProperty, 2566 this=self._parse_number(), 2567 percent=self._match(TokenType.PERCENT), 2568 ) 2569 2570 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2571 2572 def _parse_datablocksize( 2573 self, 2574 default: t.Optional[bool] = None, 2575 minimum: t.Optional[bool] = None, 2576 maximum: t.Optional[bool] = None, 2577 ) -> exp.DataBlocksizeProperty: 2578 self._match(TokenType.EQ) 2579 size = self._parse_number() 2580 2581 units = None 2582 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2583 units = self._prev.text 2584 2585 return self.expression( 2586 exp.DataBlocksizeProperty, 2587 size=size, 2588 units=units, 2589 default=default, 2590 minimum=minimum, 2591 maximum=maximum, 2592 ) 2593 2594 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2595 self._match(TokenType.EQ) 2596 always = self._match_text_seq("ALWAYS") 2597 manual = self._match_text_seq("MANUAL") 2598 never = self._match_text_seq("NEVER") 2599 default = self._match_text_seq("DEFAULT") 2600 2601 autotemp = None 2602 if self._match_text_seq("AUTOTEMP"): 2603 autotemp = self._parse_schema() 2604 2605 return self.expression( 2606 exp.BlockCompressionProperty, 2607 always=always, 2608 manual=manual, 2609 never=never, 2610 default=default, 2611 autotemp=autotemp, 2612 ) 2613 2614 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2615 index = self._index 2616 no = self._match_text_seq("NO") 2617 concurrent = self._match_text_seq("CONCURRENT") 2618 2619 if not self._match_text_seq("ISOLATED", "LOADING"): 2620 self._retreat(index) 2621 return None 2622 2623 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2624 return self.expression( 2625 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2626 ) 2627 2628 def _parse_locking(self) -> exp.LockingProperty: 2629 if self._match(TokenType.TABLE): 2630 kind = "TABLE" 2631 elif self._match(TokenType.VIEW): 2632 kind = "VIEW" 2633 elif self._match(TokenType.ROW): 2634 kind = "ROW" 2635 elif self._match_text_seq("DATABASE"): 2636 kind = "DATABASE" 2637 else: 2638 kind = None 2639 2640 if kind in ("DATABASE", "TABLE", "VIEW"): 2641 this = self._parse_table_parts() 2642 else: 2643 this = None 2644 2645 if self._match(TokenType.FOR): 2646 for_or_in = "FOR" 2647 elif self._match(TokenType.IN): 2648 for_or_in = "IN" 2649 else: 2650 for_or_in = None 2651 2652 if self._match_text_seq("ACCESS"): 2653 lock_type = "ACCESS" 2654 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2655 lock_type = "EXCLUSIVE" 2656 elif self._match_text_seq("SHARE"): 2657 lock_type = "SHARE" 2658 elif self._match_text_seq("READ"): 2659 lock_type = "READ" 2660 elif self._match_text_seq("WRITE"): 2661 lock_type = "WRITE" 2662 elif self._match_text_seq("CHECKSUM"): 2663 lock_type = "CHECKSUM" 2664 else: 2665 lock_type = None 2666 2667 override = self._match_text_seq("OVERRIDE") 2668 2669 return self.expression( 2670 exp.LockingProperty, 2671 this=this, 2672 kind=kind, 2673 for_or_in=for_or_in, 2674 lock_type=lock_type, 2675 override=override, 2676 ) 2677 2678 def _parse_partition_by(self) -> t.List[exp.Expression]: 2679 if self._match(TokenType.PARTITION_BY): 2680 return self._parse_csv(self._parse_assignment) 2681 return [] 2682 2683 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2684 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2685 if self._match_text_seq("MINVALUE"): 2686 return exp.var("MINVALUE") 2687 if self._match_text_seq("MAXVALUE"): 2688 return exp.var("MAXVALUE") 2689 return self._parse_bitwise() 2690 2691 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2692 expression = None 2693 from_expressions = None 2694 to_expressions = None 2695 2696 if self._match(TokenType.IN): 2697 this = self._parse_wrapped_csv(self._parse_bitwise) 2698 elif self._match(TokenType.FROM): 2699 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2700 self._match_text_seq("TO") 2701 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2702 elif self._match_text_seq("WITH", "(", "MODULUS"): 2703 this = self._parse_number() 2704 self._match_text_seq(",", "REMAINDER") 2705 expression = self._parse_number() 2706 self._match_r_paren() 2707 else: 2708 self.raise_error("Failed to parse partition bound spec.") 2709 2710 return self.expression( 2711 exp.PartitionBoundSpec, 2712 this=this, 2713 expression=expression, 2714 from_expressions=from_expressions, 2715 to_expressions=to_expressions, 2716 ) 2717 2718 # https://www.postgresql.org/docs/current/sql-createtable.html 2719 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2720 if not self._match_text_seq("OF"): 2721 self._retreat(self._index - 1) 2722 return None 2723 2724 this = self._parse_table(schema=True) 2725 2726 if self._match(TokenType.DEFAULT): 2727 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2728 elif self._match_text_seq("FOR", "VALUES"): 2729 expression = self._parse_partition_bound_spec() 2730 else: 2731 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2732 2733 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2734 2735 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2736 self._match(TokenType.EQ) 2737 return self.expression( 2738 exp.PartitionedByProperty, 2739 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2740 ) 2741 2742 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2743 if self._match_text_seq("AND", "STATISTICS"): 2744 statistics = True 2745 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2746 statistics = False 2747 else: 2748 statistics = None 2749 2750 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2751 2752 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2753 if self._match_text_seq("SQL"): 2754 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2755 return None 2756 2757 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2758 if self._match_text_seq("SQL", "DATA"): 2759 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2760 return None 2761 2762 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2763 if self._match_text_seq("PRIMARY", "INDEX"): 2764 return exp.NoPrimaryIndexProperty() 2765 if self._match_text_seq("SQL"): 2766 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2767 return None 2768 2769 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2770 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2771 return exp.OnCommitProperty() 2772 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2773 return exp.OnCommitProperty(delete=True) 2774 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2775 2776 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2777 if self._match_text_seq("SQL", "DATA"): 2778 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2779 return None 2780 2781 def _parse_distkey(self) -> exp.DistKeyProperty: 2782 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2783 2784 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2785 table = self._parse_table(schema=True) 2786 2787 options = [] 2788 while self._match_texts(("INCLUDING", "EXCLUDING")): 2789 this = self._prev.text.upper() 2790 2791 id_var = self._parse_id_var() 2792 if not id_var: 2793 return None 2794 2795 options.append( 2796 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2797 ) 2798 2799 return self.expression(exp.LikeProperty, this=table, expressions=options) 2800 2801 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2802 return self.expression( 2803 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2804 ) 2805 2806 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2807 self._match(TokenType.EQ) 2808 return self.expression( 2809 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2810 ) 2811 2812 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2813 self._match_text_seq("WITH", "CONNECTION") 2814 return self.expression( 2815 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2816 ) 2817 2818 def _parse_returns(self) -> exp.ReturnsProperty: 2819 value: t.Optional[exp.Expression] 2820 null = None 2821 is_table = self._match(TokenType.TABLE) 2822 2823 if is_table: 2824 if self._match(TokenType.LT): 2825 value = self.expression( 2826 exp.Schema, 2827 this="TABLE", 2828 expressions=self._parse_csv(self._parse_struct_types), 2829 ) 2830 if not self._match(TokenType.GT): 2831 self.raise_error("Expecting >") 2832 else: 2833 value = self._parse_schema(exp.var("TABLE")) 2834 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2835 null = True 2836 value = None 2837 else: 2838 value = self._parse_types() 2839 2840 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2841 2842 def _parse_describe(self) -> exp.Describe: 2843 kind = self._match_set(self.CREATABLES) and self._prev.text 2844 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2845 if self._match(TokenType.DOT): 2846 style = None 2847 self._retreat(self._index - 2) 2848 2849 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2850 2851 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2852 this = self._parse_statement() 2853 else: 2854 this = self._parse_table(schema=True) 2855 2856 properties = self._parse_properties() 2857 expressions = properties.expressions if properties else None 2858 partition = self._parse_partition() 2859 return self.expression( 2860 exp.Describe, 2861 this=this, 2862 style=style, 2863 kind=kind, 2864 expressions=expressions, 2865 partition=partition, 2866 format=format, 2867 ) 2868 2869 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2870 kind = self._prev.text.upper() 2871 expressions = [] 2872 2873 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2874 if self._match(TokenType.WHEN): 2875 expression = self._parse_disjunction() 2876 self._match(TokenType.THEN) 2877 else: 2878 expression = None 2879 2880 else_ = self._match(TokenType.ELSE) 2881 2882 if not self._match(TokenType.INTO): 2883 return None 2884 2885 return self.expression( 2886 exp.ConditionalInsert, 2887 this=self.expression( 2888 exp.Insert, 2889 this=self._parse_table(schema=True), 2890 expression=self._parse_derived_table_values(), 2891 ), 2892 expression=expression, 2893 else_=else_, 2894 ) 2895 2896 expression = parse_conditional_insert() 2897 while expression is not None: 2898 expressions.append(expression) 2899 expression = parse_conditional_insert() 2900 2901 return self.expression( 2902 exp.MultitableInserts, 2903 kind=kind, 2904 comments=comments, 2905 expressions=expressions, 2906 source=self._parse_table(), 2907 ) 2908 2909 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2910 comments = [] 2911 hint = self._parse_hint() 2912 overwrite = self._match(TokenType.OVERWRITE) 2913 ignore = self._match(TokenType.IGNORE) 2914 local = self._match_text_seq("LOCAL") 2915 alternative = None 2916 is_function = None 2917 2918 if self._match_text_seq("DIRECTORY"): 2919 this: t.Optional[exp.Expression] = self.expression( 2920 exp.Directory, 2921 this=self._parse_var_or_string(), 2922 local=local, 2923 row_format=self._parse_row_format(match_row=True), 2924 ) 2925 else: 2926 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2927 comments += ensure_list(self._prev_comments) 2928 return self._parse_multitable_inserts(comments) 2929 2930 if self._match(TokenType.OR): 2931 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2932 2933 self._match(TokenType.INTO) 2934 comments += ensure_list(self._prev_comments) 2935 self._match(TokenType.TABLE) 2936 is_function = self._match(TokenType.FUNCTION) 2937 2938 this = ( 2939 self._parse_table(schema=True, parse_partition=True) 2940 if not is_function 2941 else self._parse_function() 2942 ) 2943 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2944 this.set("alias", self._parse_table_alias()) 2945 2946 returning = self._parse_returning() 2947 2948 return self.expression( 2949 exp.Insert, 2950 comments=comments, 2951 hint=hint, 2952 is_function=is_function, 2953 this=this, 2954 stored=self._match_text_seq("STORED") and self._parse_stored(), 2955 by_name=self._match_text_seq("BY", "NAME"), 2956 exists=self._parse_exists(), 2957 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2958 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2959 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2960 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2961 conflict=self._parse_on_conflict(), 2962 returning=returning or self._parse_returning(), 2963 overwrite=overwrite, 2964 alternative=alternative, 2965 ignore=ignore, 2966 source=self._match(TokenType.TABLE) and self._parse_table(), 2967 ) 2968 2969 def _parse_kill(self) -> exp.Kill: 2970 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2971 2972 return self.expression( 2973 exp.Kill, 2974 this=self._parse_primary(), 2975 kind=kind, 2976 ) 2977 2978 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2979 conflict = self._match_text_seq("ON", "CONFLICT") 2980 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2981 2982 if not conflict and not duplicate: 2983 return None 2984 2985 conflict_keys = None 2986 constraint = None 2987 2988 if conflict: 2989 if self._match_text_seq("ON", "CONSTRAINT"): 2990 constraint = self._parse_id_var() 2991 elif self._match(TokenType.L_PAREN): 2992 conflict_keys = self._parse_csv(self._parse_id_var) 2993 self._match_r_paren() 2994 2995 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2996 if self._prev.token_type == TokenType.UPDATE: 2997 self._match(TokenType.SET) 2998 expressions = self._parse_csv(self._parse_equality) 2999 else: 3000 expressions = None 3001 3002 return self.expression( 3003 exp.OnConflict, 3004 duplicate=duplicate, 3005 expressions=expressions, 3006 action=action, 3007 conflict_keys=conflict_keys, 3008 constraint=constraint, 3009 where=self._parse_where(), 3010 ) 3011 3012 def _parse_returning(self) -> t.Optional[exp.Returning]: 3013 if not self._match(TokenType.RETURNING): 3014 return None 3015 return self.expression( 3016 exp.Returning, 3017 expressions=self._parse_csv(self._parse_expression), 3018 into=self._match(TokenType.INTO) and self._parse_table_part(), 3019 ) 3020 3021 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3022 if not self._match(TokenType.FORMAT): 3023 return None 3024 return self._parse_row_format() 3025 3026 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3027 index = self._index 3028 with_ = with_ or self._match_text_seq("WITH") 3029 3030 if not self._match(TokenType.SERDE_PROPERTIES): 3031 self._retreat(index) 3032 return None 3033 return self.expression( 3034 exp.SerdeProperties, 3035 **{ # type: ignore 3036 "expressions": self._parse_wrapped_properties(), 3037 "with": with_, 3038 }, 3039 ) 3040 3041 def _parse_row_format( 3042 self, match_row: bool = False 3043 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3044 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3045 return None 3046 3047 if self._match_text_seq("SERDE"): 3048 this = self._parse_string() 3049 3050 serde_properties = self._parse_serde_properties() 3051 3052 return self.expression( 3053 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3054 ) 3055 3056 self._match_text_seq("DELIMITED") 3057 3058 kwargs = {} 3059 3060 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3061 kwargs["fields"] = self._parse_string() 3062 if self._match_text_seq("ESCAPED", "BY"): 3063 kwargs["escaped"] = self._parse_string() 3064 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3065 kwargs["collection_items"] = self._parse_string() 3066 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3067 kwargs["map_keys"] = self._parse_string() 3068 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3069 kwargs["lines"] = self._parse_string() 3070 if self._match_text_seq("NULL", "DEFINED", "AS"): 3071 kwargs["null"] = self._parse_string() 3072 3073 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3074 3075 def _parse_load(self) -> exp.LoadData | exp.Command: 3076 if self._match_text_seq("DATA"): 3077 local = self._match_text_seq("LOCAL") 3078 self._match_text_seq("INPATH") 3079 inpath = self._parse_string() 3080 overwrite = self._match(TokenType.OVERWRITE) 3081 self._match_pair(TokenType.INTO, TokenType.TABLE) 3082 3083 return self.expression( 3084 exp.LoadData, 3085 this=self._parse_table(schema=True), 3086 local=local, 3087 overwrite=overwrite, 3088 inpath=inpath, 3089 partition=self._parse_partition(), 3090 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3091 serde=self._match_text_seq("SERDE") and self._parse_string(), 3092 ) 3093 return self._parse_as_command(self._prev) 3094 3095 def _parse_delete(self) -> exp.Delete: 3096 # This handles MySQL's "Multiple-Table Syntax" 3097 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3098 tables = None 3099 if not self._match(TokenType.FROM, advance=False): 3100 tables = self._parse_csv(self._parse_table) or None 3101 3102 returning = self._parse_returning() 3103 3104 return self.expression( 3105 exp.Delete, 3106 tables=tables, 3107 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3108 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3109 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3110 where=self._parse_where(), 3111 returning=returning or self._parse_returning(), 3112 limit=self._parse_limit(), 3113 ) 3114 3115 def _parse_update(self) -> exp.Update: 3116 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3117 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3118 returning = self._parse_returning() 3119 return self.expression( 3120 exp.Update, 3121 **{ # type: ignore 3122 "this": this, 3123 "expressions": expressions, 3124 "from": self._parse_from(joins=True), 3125 "where": self._parse_where(), 3126 "returning": returning or self._parse_returning(), 3127 "order": self._parse_order(), 3128 "limit": self._parse_limit(), 3129 }, 3130 ) 3131 3132 def _parse_use(self) -> exp.Use: 3133 return self.expression( 3134 exp.Use, 3135 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3136 this=self._parse_table(schema=False), 3137 ) 3138 3139 def _parse_uncache(self) -> exp.Uncache: 3140 if not self._match(TokenType.TABLE): 3141 self.raise_error("Expecting TABLE after UNCACHE") 3142 3143 return self.expression( 3144 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3145 ) 3146 3147 def _parse_cache(self) -> exp.Cache: 3148 lazy = self._match_text_seq("LAZY") 3149 self._match(TokenType.TABLE) 3150 table = self._parse_table(schema=True) 3151 3152 options = [] 3153 if self._match_text_seq("OPTIONS"): 3154 self._match_l_paren() 3155 k = self._parse_string() 3156 self._match(TokenType.EQ) 3157 v = self._parse_string() 3158 options = [k, v] 3159 self._match_r_paren() 3160 3161 self._match(TokenType.ALIAS) 3162 return self.expression( 3163 exp.Cache, 3164 this=table, 3165 lazy=lazy, 3166 options=options, 3167 expression=self._parse_select(nested=True), 3168 ) 3169 3170 def _parse_partition(self) -> t.Optional[exp.Partition]: 3171 if not self._match_texts(self.PARTITION_KEYWORDS): 3172 return None 3173 3174 return self.expression( 3175 exp.Partition, 3176 subpartition=self._prev.text.upper() == "SUBPARTITION", 3177 expressions=self._parse_wrapped_csv(self._parse_assignment), 3178 ) 3179 3180 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3181 def _parse_value_expression() -> t.Optional[exp.Expression]: 3182 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3183 return exp.var(self._prev.text.upper()) 3184 return self._parse_expression() 3185 3186 if self._match(TokenType.L_PAREN): 3187 expressions = self._parse_csv(_parse_value_expression) 3188 self._match_r_paren() 3189 return self.expression(exp.Tuple, expressions=expressions) 3190 3191 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3192 expression = self._parse_expression() 3193 if expression: 3194 return self.expression(exp.Tuple, expressions=[expression]) 3195 return None 3196 3197 def _parse_projections(self) -> t.List[exp.Expression]: 3198 return self._parse_expressions() 3199 3200 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3201 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3202 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3203 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3204 ) 3205 elif self._match(TokenType.FROM): 3206 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3207 # Support parentheses for duckdb FROM-first syntax 3208 select = self._parse_select() 3209 if select: 3210 select.set("from", from_) 3211 this = select 3212 else: 3213 this = exp.select("*").from_(t.cast(exp.From, from_)) 3214 else: 3215 this = ( 3216 self._parse_table(consume_pipe=True) 3217 if table 3218 else self._parse_select(nested=True, parse_set_operation=False) 3219 ) 3220 3221 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3222 # in case a modifier (e.g. join) is following 3223 if table and isinstance(this, exp.Values) and this.alias: 3224 alias = this.args["alias"].pop() 3225 this = exp.Table(this=this, alias=alias) 3226 3227 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3228 3229 return this 3230 3231 def _parse_select( 3232 self, 3233 nested: bool = False, 3234 table: bool = False, 3235 parse_subquery_alias: bool = True, 3236 parse_set_operation: bool = True, 3237 consume_pipe: bool = True, 3238 ) -> t.Optional[exp.Expression]: 3239 query = self._parse_select_query( 3240 nested=nested, 3241 table=table, 3242 parse_subquery_alias=parse_subquery_alias, 3243 parse_set_operation=parse_set_operation, 3244 ) 3245 3246 if ( 3247 consume_pipe 3248 and self._match(TokenType.PIPE_GT, advance=False) 3249 and isinstance(query, exp.Query) 3250 ): 3251 query = self._parse_pipe_syntax_query(query) 3252 query = query.subquery(copy=False) if query and table else query 3253 3254 return query 3255 3256 def _parse_select_query( 3257 self, 3258 nested: bool = False, 3259 table: bool = False, 3260 parse_subquery_alias: bool = True, 3261 parse_set_operation: bool = True, 3262 ) -> t.Optional[exp.Expression]: 3263 cte = self._parse_with() 3264 3265 if cte: 3266 this = self._parse_statement() 3267 3268 if not this: 3269 self.raise_error("Failed to parse any statement following CTE") 3270 return cte 3271 3272 if "with" in this.arg_types: 3273 this.set("with", cte) 3274 else: 3275 self.raise_error(f"{this.key} does not support CTE") 3276 this = cte 3277 3278 return this 3279 3280 # duckdb supports leading with FROM x 3281 from_ = ( 3282 self._parse_from(consume_pipe=True) 3283 if self._match(TokenType.FROM, advance=False) 3284 else None 3285 ) 3286 3287 if self._match(TokenType.SELECT): 3288 comments = self._prev_comments 3289 3290 hint = self._parse_hint() 3291 3292 if self._next and not self._next.token_type == TokenType.DOT: 3293 all_ = self._match(TokenType.ALL) 3294 distinct = self._match_set(self.DISTINCT_TOKENS) 3295 else: 3296 all_, distinct = None, None 3297 3298 kind = ( 3299 self._match(TokenType.ALIAS) 3300 and self._match_texts(("STRUCT", "VALUE")) 3301 and self._prev.text.upper() 3302 ) 3303 3304 if distinct: 3305 distinct = self.expression( 3306 exp.Distinct, 3307 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3308 ) 3309 3310 if all_ and distinct: 3311 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3312 3313 operation_modifiers = [] 3314 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3315 operation_modifiers.append(exp.var(self._prev.text.upper())) 3316 3317 limit = self._parse_limit(top=True) 3318 projections = self._parse_projections() 3319 3320 this = self.expression( 3321 exp.Select, 3322 kind=kind, 3323 hint=hint, 3324 distinct=distinct, 3325 expressions=projections, 3326 limit=limit, 3327 operation_modifiers=operation_modifiers or None, 3328 ) 3329 this.comments = comments 3330 3331 into = self._parse_into() 3332 if into: 3333 this.set("into", into) 3334 3335 if not from_: 3336 from_ = self._parse_from() 3337 3338 if from_: 3339 this.set("from", from_) 3340 3341 this = self._parse_query_modifiers(this) 3342 elif (table or nested) and self._match(TokenType.L_PAREN): 3343 this = self._parse_wrapped_select(table=table) 3344 3345 # We return early here so that the UNION isn't attached to the subquery by the 3346 # following call to _parse_set_operations, but instead becomes the parent node 3347 self._match_r_paren() 3348 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3349 elif self._match(TokenType.VALUES, advance=False): 3350 this = self._parse_derived_table_values() 3351 elif from_: 3352 this = exp.select("*").from_(from_.this, copy=False) 3353 elif self._match(TokenType.SUMMARIZE): 3354 table = self._match(TokenType.TABLE) 3355 this = self._parse_select() or self._parse_string() or self._parse_table() 3356 return self.expression(exp.Summarize, this=this, table=table) 3357 elif self._match(TokenType.DESCRIBE): 3358 this = self._parse_describe() 3359 elif self._match_text_seq("STREAM"): 3360 this = self._parse_function() 3361 if this: 3362 this = self.expression(exp.Stream, this=this) 3363 else: 3364 self._retreat(self._index - 1) 3365 else: 3366 this = None 3367 3368 return self._parse_set_operations(this) if parse_set_operation else this 3369 3370 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3371 self._match_text_seq("SEARCH") 3372 3373 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3374 3375 if not kind: 3376 return None 3377 3378 self._match_text_seq("FIRST", "BY") 3379 3380 return self.expression( 3381 exp.RecursiveWithSearch, 3382 kind=kind, 3383 this=self._parse_id_var(), 3384 expression=self._match_text_seq("SET") and self._parse_id_var(), 3385 using=self._match_text_seq("USING") and self._parse_id_var(), 3386 ) 3387 3388 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3389 if not skip_with_token and not self._match(TokenType.WITH): 3390 return None 3391 3392 comments = self._prev_comments 3393 recursive = self._match(TokenType.RECURSIVE) 3394 3395 last_comments = None 3396 expressions = [] 3397 while True: 3398 cte = self._parse_cte() 3399 if isinstance(cte, exp.CTE): 3400 expressions.append(cte) 3401 if last_comments: 3402 cte.add_comments(last_comments) 3403 3404 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3405 break 3406 else: 3407 self._match(TokenType.WITH) 3408 3409 last_comments = self._prev_comments 3410 3411 return self.expression( 3412 exp.With, 3413 comments=comments, 3414 expressions=expressions, 3415 recursive=recursive, 3416 search=self._parse_recursive_with_search(), 3417 ) 3418 3419 def _parse_cte(self) -> t.Optional[exp.CTE]: 3420 index = self._index 3421 3422 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3423 if not alias or not alias.this: 3424 self.raise_error("Expected CTE to have alias") 3425 3426 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3427 self._retreat(index) 3428 return None 3429 3430 comments = self._prev_comments 3431 3432 if self._match_text_seq("NOT", "MATERIALIZED"): 3433 materialized = False 3434 elif self._match_text_seq("MATERIALIZED"): 3435 materialized = True 3436 else: 3437 materialized = None 3438 3439 cte = self.expression( 3440 exp.CTE, 3441 this=self._parse_wrapped(self._parse_statement), 3442 alias=alias, 3443 materialized=materialized, 3444 comments=comments, 3445 ) 3446 3447 values = cte.this 3448 if isinstance(values, exp.Values): 3449 if values.alias: 3450 cte.set("this", exp.select("*").from_(values)) 3451 else: 3452 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3453 3454 return cte 3455 3456 def _parse_table_alias( 3457 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3458 ) -> t.Optional[exp.TableAlias]: 3459 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3460 # so this section tries to parse the clause version and if it fails, it treats the token 3461 # as an identifier (alias) 3462 if self._can_parse_limit_or_offset(): 3463 return None 3464 3465 any_token = self._match(TokenType.ALIAS) 3466 alias = ( 3467 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3468 or self._parse_string_as_identifier() 3469 ) 3470 3471 index = self._index 3472 if self._match(TokenType.L_PAREN): 3473 columns = self._parse_csv(self._parse_function_parameter) 3474 self._match_r_paren() if columns else self._retreat(index) 3475 else: 3476 columns = None 3477 3478 if not alias and not columns: 3479 return None 3480 3481 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3482 3483 # We bubble up comments from the Identifier to the TableAlias 3484 if isinstance(alias, exp.Identifier): 3485 table_alias.add_comments(alias.pop_comments()) 3486 3487 return table_alias 3488 3489 def _parse_subquery( 3490 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3491 ) -> t.Optional[exp.Subquery]: 3492 if not this: 3493 return None 3494 3495 return self.expression( 3496 exp.Subquery, 3497 this=this, 3498 pivots=self._parse_pivots(), 3499 alias=self._parse_table_alias() if parse_alias else None, 3500 sample=self._parse_table_sample(), 3501 ) 3502 3503 def _implicit_unnests_to_explicit(self, this: E) -> E: 3504 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3505 3506 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3507 for i, join in enumerate(this.args.get("joins") or []): 3508 table = join.this 3509 normalized_table = table.copy() 3510 normalized_table.meta["maybe_column"] = True 3511 normalized_table = _norm(normalized_table, dialect=self.dialect) 3512 3513 if isinstance(table, exp.Table) and not join.args.get("on"): 3514 if normalized_table.parts[0].name in refs: 3515 table_as_column = table.to_column() 3516 unnest = exp.Unnest(expressions=[table_as_column]) 3517 3518 # Table.to_column creates a parent Alias node that we want to convert to 3519 # a TableAlias and attach to the Unnest, so it matches the parser's output 3520 if isinstance(table.args.get("alias"), exp.TableAlias): 3521 table_as_column.replace(table_as_column.this) 3522 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3523 3524 table.replace(unnest) 3525 3526 refs.add(normalized_table.alias_or_name) 3527 3528 return this 3529 3530 def _parse_query_modifiers( 3531 self, this: t.Optional[exp.Expression] 3532 ) -> t.Optional[exp.Expression]: 3533 if isinstance(this, self.MODIFIABLES): 3534 for join in self._parse_joins(): 3535 this.append("joins", join) 3536 for lateral in iter(self._parse_lateral, None): 3537 this.append("laterals", lateral) 3538 3539 while True: 3540 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3541 modifier_token = self._curr 3542 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3543 key, expression = parser(self) 3544 3545 if expression: 3546 if this.args.get(key): 3547 self.raise_error( 3548 f"Found multiple '{modifier_token.text.upper()}' clauses", 3549 token=modifier_token, 3550 ) 3551 3552 this.set(key, expression) 3553 if key == "limit": 3554 offset = expression.args.pop("offset", None) 3555 3556 if offset: 3557 offset = exp.Offset(expression=offset) 3558 this.set("offset", offset) 3559 3560 limit_by_expressions = expression.expressions 3561 expression.set("expressions", None) 3562 offset.set("expressions", limit_by_expressions) 3563 continue 3564 break 3565 3566 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3567 this = self._implicit_unnests_to_explicit(this) 3568 3569 return this 3570 3571 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3572 start = self._curr 3573 while self._curr: 3574 self._advance() 3575 3576 end = self._tokens[self._index - 1] 3577 return exp.Hint(expressions=[self._find_sql(start, end)]) 3578 3579 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3580 return self._parse_function_call() 3581 3582 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3583 start_index = self._index 3584 should_fallback_to_string = False 3585 3586 hints = [] 3587 try: 3588 for hint in iter( 3589 lambda: self._parse_csv( 3590 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3591 ), 3592 [], 3593 ): 3594 hints.extend(hint) 3595 except ParseError: 3596 should_fallback_to_string = True 3597 3598 if should_fallback_to_string or self._curr: 3599 self._retreat(start_index) 3600 return self._parse_hint_fallback_to_string() 3601 3602 return self.expression(exp.Hint, expressions=hints) 3603 3604 def _parse_hint(self) -> t.Optional[exp.Hint]: 3605 if self._match(TokenType.HINT) and self._prev_comments: 3606 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3607 3608 return None 3609 3610 def _parse_into(self) -> t.Optional[exp.Into]: 3611 if not self._match(TokenType.INTO): 3612 return None 3613 3614 temp = self._match(TokenType.TEMPORARY) 3615 unlogged = self._match_text_seq("UNLOGGED") 3616 self._match(TokenType.TABLE) 3617 3618 return self.expression( 3619 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3620 ) 3621 3622 def _parse_from( 3623 self, 3624 joins: bool = False, 3625 skip_from_token: bool = False, 3626 consume_pipe: bool = False, 3627 ) -> t.Optional[exp.From]: 3628 if not skip_from_token and not self._match(TokenType.FROM): 3629 return None 3630 3631 return self.expression( 3632 exp.From, 3633 comments=self._prev_comments, 3634 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3635 ) 3636 3637 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3638 return self.expression( 3639 exp.MatchRecognizeMeasure, 3640 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3641 this=self._parse_expression(), 3642 ) 3643 3644 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3645 if not self._match(TokenType.MATCH_RECOGNIZE): 3646 return None 3647 3648 self._match_l_paren() 3649 3650 partition = self._parse_partition_by() 3651 order = self._parse_order() 3652 3653 measures = ( 3654 self._parse_csv(self._parse_match_recognize_measure) 3655 if self._match_text_seq("MEASURES") 3656 else None 3657 ) 3658 3659 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3660 rows = exp.var("ONE ROW PER MATCH") 3661 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3662 text = "ALL ROWS PER MATCH" 3663 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3664 text += " SHOW EMPTY MATCHES" 3665 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3666 text += " OMIT EMPTY MATCHES" 3667 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3668 text += " WITH UNMATCHED ROWS" 3669 rows = exp.var(text) 3670 else: 3671 rows = None 3672 3673 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3674 text = "AFTER MATCH SKIP" 3675 if self._match_text_seq("PAST", "LAST", "ROW"): 3676 text += " PAST LAST ROW" 3677 elif self._match_text_seq("TO", "NEXT", "ROW"): 3678 text += " TO NEXT ROW" 3679 elif self._match_text_seq("TO", "FIRST"): 3680 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3681 elif self._match_text_seq("TO", "LAST"): 3682 text += f" TO LAST {self._advance_any().text}" # type: ignore 3683 after = exp.var(text) 3684 else: 3685 after = None 3686 3687 if self._match_text_seq("PATTERN"): 3688 self._match_l_paren() 3689 3690 if not self._curr: 3691 self.raise_error("Expecting )", self._curr) 3692 3693 paren = 1 3694 start = self._curr 3695 3696 while self._curr and paren > 0: 3697 if self._curr.token_type == TokenType.L_PAREN: 3698 paren += 1 3699 if self._curr.token_type == TokenType.R_PAREN: 3700 paren -= 1 3701 3702 end = self._prev 3703 self._advance() 3704 3705 if paren > 0: 3706 self.raise_error("Expecting )", self._curr) 3707 3708 pattern = exp.var(self._find_sql(start, end)) 3709 else: 3710 pattern = None 3711 3712 define = ( 3713 self._parse_csv(self._parse_name_as_expression) 3714 if self._match_text_seq("DEFINE") 3715 else None 3716 ) 3717 3718 self._match_r_paren() 3719 3720 return self.expression( 3721 exp.MatchRecognize, 3722 partition_by=partition, 3723 order=order, 3724 measures=measures, 3725 rows=rows, 3726 after=after, 3727 pattern=pattern, 3728 define=define, 3729 alias=self._parse_table_alias(), 3730 ) 3731 3732 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3733 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3734 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3735 cross_apply = False 3736 3737 if cross_apply is not None: 3738 this = self._parse_select(table=True) 3739 view = None 3740 outer = None 3741 elif self._match(TokenType.LATERAL): 3742 this = self._parse_select(table=True) 3743 view = self._match(TokenType.VIEW) 3744 outer = self._match(TokenType.OUTER) 3745 else: 3746 return None 3747 3748 if not this: 3749 this = ( 3750 self._parse_unnest() 3751 or self._parse_function() 3752 or self._parse_id_var(any_token=False) 3753 ) 3754 3755 while self._match(TokenType.DOT): 3756 this = exp.Dot( 3757 this=this, 3758 expression=self._parse_function() or self._parse_id_var(any_token=False), 3759 ) 3760 3761 ordinality: t.Optional[bool] = None 3762 3763 if view: 3764 table = self._parse_id_var(any_token=False) 3765 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3766 table_alias: t.Optional[exp.TableAlias] = self.expression( 3767 exp.TableAlias, this=table, columns=columns 3768 ) 3769 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3770 # We move the alias from the lateral's child node to the lateral itself 3771 table_alias = this.args["alias"].pop() 3772 else: 3773 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3774 table_alias = self._parse_table_alias() 3775 3776 return self.expression( 3777 exp.Lateral, 3778 this=this, 3779 view=view, 3780 outer=outer, 3781 alias=table_alias, 3782 cross_apply=cross_apply, 3783 ordinality=ordinality, 3784 ) 3785 3786 def _parse_join_parts( 3787 self, 3788 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3789 return ( 3790 self._match_set(self.JOIN_METHODS) and self._prev, 3791 self._match_set(self.JOIN_SIDES) and self._prev, 3792 self._match_set(self.JOIN_KINDS) and self._prev, 3793 ) 3794 3795 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3796 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3797 this = self._parse_column() 3798 if isinstance(this, exp.Column): 3799 return this.this 3800 return this 3801 3802 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3803 3804 def _parse_join( 3805 self, skip_join_token: bool = False, parse_bracket: bool = False 3806 ) -> t.Optional[exp.Join]: 3807 if self._match(TokenType.COMMA): 3808 table = self._try_parse(self._parse_table) 3809 cross_join = self.expression(exp.Join, this=table) if table else None 3810 3811 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3812 cross_join.set("kind", "CROSS") 3813 3814 return cross_join 3815 3816 index = self._index 3817 method, side, kind = self._parse_join_parts() 3818 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3819 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3820 join_comments = self._prev_comments 3821 3822 if not skip_join_token and not join: 3823 self._retreat(index) 3824 kind = None 3825 method = None 3826 side = None 3827 3828 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3829 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3830 3831 if not skip_join_token and not join and not outer_apply and not cross_apply: 3832 return None 3833 3834 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3835 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3836 kwargs["expressions"] = self._parse_csv( 3837 lambda: self._parse_table(parse_bracket=parse_bracket) 3838 ) 3839 3840 if method: 3841 kwargs["method"] = method.text 3842 if side: 3843 kwargs["side"] = side.text 3844 if kind: 3845 kwargs["kind"] = kind.text 3846 if hint: 3847 kwargs["hint"] = hint 3848 3849 if self._match(TokenType.MATCH_CONDITION): 3850 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3851 3852 if self._match(TokenType.ON): 3853 kwargs["on"] = self._parse_assignment() 3854 elif self._match(TokenType.USING): 3855 kwargs["using"] = self._parse_using_identifiers() 3856 elif ( 3857 not method 3858 and not (outer_apply or cross_apply) 3859 and not isinstance(kwargs["this"], exp.Unnest) 3860 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3861 ): 3862 index = self._index 3863 joins: t.Optional[list] = list(self._parse_joins()) 3864 3865 if joins and self._match(TokenType.ON): 3866 kwargs["on"] = self._parse_assignment() 3867 elif joins and self._match(TokenType.USING): 3868 kwargs["using"] = self._parse_using_identifiers() 3869 else: 3870 joins = None 3871 self._retreat(index) 3872 3873 kwargs["this"].set("joins", joins if joins else None) 3874 3875 kwargs["pivots"] = self._parse_pivots() 3876 3877 comments = [c for token in (method, side, kind) if token for c in token.comments] 3878 comments = (join_comments or []) + comments 3879 return self.expression(exp.Join, comments=comments, **kwargs) 3880 3881 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3882 this = self._parse_assignment() 3883 3884 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3885 return this 3886 3887 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3888 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3889 3890 return this 3891 3892 def _parse_index_params(self) -> exp.IndexParameters: 3893 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3894 3895 if self._match(TokenType.L_PAREN, advance=False): 3896 columns = self._parse_wrapped_csv(self._parse_with_operator) 3897 else: 3898 columns = None 3899 3900 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3901 partition_by = self._parse_partition_by() 3902 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3903 tablespace = ( 3904 self._parse_var(any_token=True) 3905 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3906 else None 3907 ) 3908 where = self._parse_where() 3909 3910 on = self._parse_field() if self._match(TokenType.ON) else None 3911 3912 return self.expression( 3913 exp.IndexParameters, 3914 using=using, 3915 columns=columns, 3916 include=include, 3917 partition_by=partition_by, 3918 where=where, 3919 with_storage=with_storage, 3920 tablespace=tablespace, 3921 on=on, 3922 ) 3923 3924 def _parse_index( 3925 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3926 ) -> t.Optional[exp.Index]: 3927 if index or anonymous: 3928 unique = None 3929 primary = None 3930 amp = None 3931 3932 self._match(TokenType.ON) 3933 self._match(TokenType.TABLE) # hive 3934 table = self._parse_table_parts(schema=True) 3935 else: 3936 unique = self._match(TokenType.UNIQUE) 3937 primary = self._match_text_seq("PRIMARY") 3938 amp = self._match_text_seq("AMP") 3939 3940 if not self._match(TokenType.INDEX): 3941 return None 3942 3943 index = self._parse_id_var() 3944 table = None 3945 3946 params = self._parse_index_params() 3947 3948 return self.expression( 3949 exp.Index, 3950 this=index, 3951 table=table, 3952 unique=unique, 3953 primary=primary, 3954 amp=amp, 3955 params=params, 3956 ) 3957 3958 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3959 hints: t.List[exp.Expression] = [] 3960 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3961 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3962 hints.append( 3963 self.expression( 3964 exp.WithTableHint, 3965 expressions=self._parse_csv( 3966 lambda: self._parse_function() or self._parse_var(any_token=True) 3967 ), 3968 ) 3969 ) 3970 self._match_r_paren() 3971 else: 3972 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3973 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3974 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3975 3976 self._match_set((TokenType.INDEX, TokenType.KEY)) 3977 if self._match(TokenType.FOR): 3978 hint.set("target", self._advance_any() and self._prev.text.upper()) 3979 3980 hint.set("expressions", self._parse_wrapped_id_vars()) 3981 hints.append(hint) 3982 3983 return hints or None 3984 3985 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3986 return ( 3987 (not schema and self._parse_function(optional_parens=False)) 3988 or self._parse_id_var(any_token=False) 3989 or self._parse_string_as_identifier() 3990 or self._parse_placeholder() 3991 ) 3992 3993 def _parse_table_parts( 3994 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3995 ) -> exp.Table: 3996 catalog = None 3997 db = None 3998 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3999 4000 while self._match(TokenType.DOT): 4001 if catalog: 4002 # This allows nesting the table in arbitrarily many dot expressions if needed 4003 table = self.expression( 4004 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4005 ) 4006 else: 4007 catalog = db 4008 db = table 4009 # "" used for tsql FROM a..b case 4010 table = self._parse_table_part(schema=schema) or "" 4011 4012 if ( 4013 wildcard 4014 and self._is_connected() 4015 and (isinstance(table, exp.Identifier) or not table) 4016 and self._match(TokenType.STAR) 4017 ): 4018 if isinstance(table, exp.Identifier): 4019 table.args["this"] += "*" 4020 else: 4021 table = exp.Identifier(this="*") 4022 4023 # We bubble up comments from the Identifier to the Table 4024 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4025 4026 if is_db_reference: 4027 catalog = db 4028 db = table 4029 table = None 4030 4031 if not table and not is_db_reference: 4032 self.raise_error(f"Expected table name but got {self._curr}") 4033 if not db and is_db_reference: 4034 self.raise_error(f"Expected database name but got {self._curr}") 4035 4036 table = self.expression( 4037 exp.Table, 4038 comments=comments, 4039 this=table, 4040 db=db, 4041 catalog=catalog, 4042 ) 4043 4044 changes = self._parse_changes() 4045 if changes: 4046 table.set("changes", changes) 4047 4048 at_before = self._parse_historical_data() 4049 if at_before: 4050 table.set("when", at_before) 4051 4052 pivots = self._parse_pivots() 4053 if pivots: 4054 table.set("pivots", pivots) 4055 4056 return table 4057 4058 def _parse_table( 4059 self, 4060 schema: bool = False, 4061 joins: bool = False, 4062 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4063 parse_bracket: bool = False, 4064 is_db_reference: bool = False, 4065 parse_partition: bool = False, 4066 consume_pipe: bool = False, 4067 ) -> t.Optional[exp.Expression]: 4068 lateral = self._parse_lateral() 4069 if lateral: 4070 return lateral 4071 4072 unnest = self._parse_unnest() 4073 if unnest: 4074 return unnest 4075 4076 values = self._parse_derived_table_values() 4077 if values: 4078 return values 4079 4080 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4081 if subquery: 4082 if not subquery.args.get("pivots"): 4083 subquery.set("pivots", self._parse_pivots()) 4084 return subquery 4085 4086 bracket = parse_bracket and self._parse_bracket(None) 4087 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4088 4089 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4090 self._parse_table 4091 ) 4092 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4093 4094 only = self._match(TokenType.ONLY) 4095 4096 this = t.cast( 4097 exp.Expression, 4098 bracket 4099 or rows_from 4100 or self._parse_bracket( 4101 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4102 ), 4103 ) 4104 4105 if only: 4106 this.set("only", only) 4107 4108 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4109 self._match_text_seq("*") 4110 4111 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4112 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4113 this.set("partition", self._parse_partition()) 4114 4115 if schema: 4116 return self._parse_schema(this=this) 4117 4118 version = self._parse_version() 4119 4120 if version: 4121 this.set("version", version) 4122 4123 if self.dialect.ALIAS_POST_TABLESAMPLE: 4124 this.set("sample", self._parse_table_sample()) 4125 4126 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4127 if alias: 4128 this.set("alias", alias) 4129 4130 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4131 return self.expression( 4132 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4133 ) 4134 4135 this.set("hints", self._parse_table_hints()) 4136 4137 if not this.args.get("pivots"): 4138 this.set("pivots", self._parse_pivots()) 4139 4140 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4141 this.set("sample", self._parse_table_sample()) 4142 4143 if joins: 4144 for join in self._parse_joins(): 4145 this.append("joins", join) 4146 4147 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4148 this.set("ordinality", True) 4149 this.set("alias", self._parse_table_alias()) 4150 4151 return this 4152 4153 def _parse_version(self) -> t.Optional[exp.Version]: 4154 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4155 this = "TIMESTAMP" 4156 elif self._match(TokenType.VERSION_SNAPSHOT): 4157 this = "VERSION" 4158 else: 4159 return None 4160 4161 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4162 kind = self._prev.text.upper() 4163 start = self._parse_bitwise() 4164 self._match_texts(("TO", "AND")) 4165 end = self._parse_bitwise() 4166 expression: t.Optional[exp.Expression] = self.expression( 4167 exp.Tuple, expressions=[start, end] 4168 ) 4169 elif self._match_text_seq("CONTAINED", "IN"): 4170 kind = "CONTAINED IN" 4171 expression = self.expression( 4172 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4173 ) 4174 elif self._match(TokenType.ALL): 4175 kind = "ALL" 4176 expression = None 4177 else: 4178 self._match_text_seq("AS", "OF") 4179 kind = "AS OF" 4180 expression = self._parse_type() 4181 4182 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4183 4184 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4185 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4186 index = self._index 4187 historical_data = None 4188 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4189 this = self._prev.text.upper() 4190 kind = ( 4191 self._match(TokenType.L_PAREN) 4192 and self._match_texts(self.HISTORICAL_DATA_KIND) 4193 and self._prev.text.upper() 4194 ) 4195 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4196 4197 if expression: 4198 self._match_r_paren() 4199 historical_data = self.expression( 4200 exp.HistoricalData, this=this, kind=kind, expression=expression 4201 ) 4202 else: 4203 self._retreat(index) 4204 4205 return historical_data 4206 4207 def _parse_changes(self) -> t.Optional[exp.Changes]: 4208 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4209 return None 4210 4211 information = self._parse_var(any_token=True) 4212 self._match_r_paren() 4213 4214 return self.expression( 4215 exp.Changes, 4216 information=information, 4217 at_before=self._parse_historical_data(), 4218 end=self._parse_historical_data(), 4219 ) 4220 4221 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4222 if not self._match(TokenType.UNNEST): 4223 return None 4224 4225 expressions = self._parse_wrapped_csv(self._parse_equality) 4226 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4227 4228 alias = self._parse_table_alias() if with_alias else None 4229 4230 if alias: 4231 if self.dialect.UNNEST_COLUMN_ONLY: 4232 if alias.args.get("columns"): 4233 self.raise_error("Unexpected extra column alias in unnest.") 4234 4235 alias.set("columns", [alias.this]) 4236 alias.set("this", None) 4237 4238 columns = alias.args.get("columns") or [] 4239 if offset and len(expressions) < len(columns): 4240 offset = columns.pop() 4241 4242 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4243 self._match(TokenType.ALIAS) 4244 offset = self._parse_id_var( 4245 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4246 ) or exp.to_identifier("offset") 4247 4248 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4249 4250 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4251 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4252 if not is_derived and not ( 4253 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4254 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4255 ): 4256 return None 4257 4258 expressions = self._parse_csv(self._parse_value) 4259 alias = self._parse_table_alias() 4260 4261 if is_derived: 4262 self._match_r_paren() 4263 4264 return self.expression( 4265 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4266 ) 4267 4268 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4269 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4270 as_modifier and self._match_text_seq("USING", "SAMPLE") 4271 ): 4272 return None 4273 4274 bucket_numerator = None 4275 bucket_denominator = None 4276 bucket_field = None 4277 percent = None 4278 size = None 4279 seed = None 4280 4281 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4282 matched_l_paren = self._match(TokenType.L_PAREN) 4283 4284 if self.TABLESAMPLE_CSV: 4285 num = None 4286 expressions = self._parse_csv(self._parse_primary) 4287 else: 4288 expressions = None 4289 num = ( 4290 self._parse_factor() 4291 if self._match(TokenType.NUMBER, advance=False) 4292 else self._parse_primary() or self._parse_placeholder() 4293 ) 4294 4295 if self._match_text_seq("BUCKET"): 4296 bucket_numerator = self._parse_number() 4297 self._match_text_seq("OUT", "OF") 4298 bucket_denominator = bucket_denominator = self._parse_number() 4299 self._match(TokenType.ON) 4300 bucket_field = self._parse_field() 4301 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4302 percent = num 4303 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4304 size = num 4305 else: 4306 percent = num 4307 4308 if matched_l_paren: 4309 self._match_r_paren() 4310 4311 if self._match(TokenType.L_PAREN): 4312 method = self._parse_var(upper=True) 4313 seed = self._match(TokenType.COMMA) and self._parse_number() 4314 self._match_r_paren() 4315 elif self._match_texts(("SEED", "REPEATABLE")): 4316 seed = self._parse_wrapped(self._parse_number) 4317 4318 if not method and self.DEFAULT_SAMPLING_METHOD: 4319 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4320 4321 return self.expression( 4322 exp.TableSample, 4323 expressions=expressions, 4324 method=method, 4325 bucket_numerator=bucket_numerator, 4326 bucket_denominator=bucket_denominator, 4327 bucket_field=bucket_field, 4328 percent=percent, 4329 size=size, 4330 seed=seed, 4331 ) 4332 4333 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4334 return list(iter(self._parse_pivot, None)) or None 4335 4336 def _parse_joins(self) -> t.Iterator[exp.Join]: 4337 return iter(self._parse_join, None) 4338 4339 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4340 if not self._match(TokenType.INTO): 4341 return None 4342 4343 return self.expression( 4344 exp.UnpivotColumns, 4345 this=self._match_text_seq("NAME") and self._parse_column(), 4346 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4347 ) 4348 4349 # https://duckdb.org/docs/sql/statements/pivot 4350 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4351 def _parse_on() -> t.Optional[exp.Expression]: 4352 this = self._parse_bitwise() 4353 4354 if self._match(TokenType.IN): 4355 # PIVOT ... ON col IN (row_val1, row_val2) 4356 return self._parse_in(this) 4357 if self._match(TokenType.ALIAS, advance=False): 4358 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4359 return self._parse_alias(this) 4360 4361 return this 4362 4363 this = self._parse_table() 4364 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4365 into = self._parse_unpivot_columns() 4366 using = self._match(TokenType.USING) and self._parse_csv( 4367 lambda: self._parse_alias(self._parse_function()) 4368 ) 4369 group = self._parse_group() 4370 4371 return self.expression( 4372 exp.Pivot, 4373 this=this, 4374 expressions=expressions, 4375 using=using, 4376 group=group, 4377 unpivot=is_unpivot, 4378 into=into, 4379 ) 4380 4381 def _parse_pivot_in(self) -> exp.In: 4382 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4383 this = self._parse_select_or_expression() 4384 4385 self._match(TokenType.ALIAS) 4386 alias = self._parse_bitwise() 4387 if alias: 4388 if isinstance(alias, exp.Column) and not alias.db: 4389 alias = alias.this 4390 return self.expression(exp.PivotAlias, this=this, alias=alias) 4391 4392 return this 4393 4394 value = self._parse_column() 4395 4396 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4397 self.raise_error("Expecting IN (") 4398 4399 if self._match(TokenType.ANY): 4400 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4401 else: 4402 exprs = self._parse_csv(_parse_aliased_expression) 4403 4404 self._match_r_paren() 4405 return self.expression(exp.In, this=value, expressions=exprs) 4406 4407 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4408 func = self._parse_function() 4409 if not func: 4410 self.raise_error("Expecting an aggregation function in PIVOT") 4411 4412 return self._parse_alias(func) 4413 4414 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4415 index = self._index 4416 include_nulls = None 4417 4418 if self._match(TokenType.PIVOT): 4419 unpivot = False 4420 elif self._match(TokenType.UNPIVOT): 4421 unpivot = True 4422 4423 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4424 if self._match_text_seq("INCLUDE", "NULLS"): 4425 include_nulls = True 4426 elif self._match_text_seq("EXCLUDE", "NULLS"): 4427 include_nulls = False 4428 else: 4429 return None 4430 4431 expressions = [] 4432 4433 if not self._match(TokenType.L_PAREN): 4434 self._retreat(index) 4435 return None 4436 4437 if unpivot: 4438 expressions = self._parse_csv(self._parse_column) 4439 else: 4440 expressions = self._parse_csv(self._parse_pivot_aggregation) 4441 4442 if not expressions: 4443 self.raise_error("Failed to parse PIVOT's aggregation list") 4444 4445 if not self._match(TokenType.FOR): 4446 self.raise_error("Expecting FOR") 4447 4448 fields = [] 4449 while True: 4450 field = self._try_parse(self._parse_pivot_in) 4451 if not field: 4452 break 4453 fields.append(field) 4454 4455 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4456 self._parse_bitwise 4457 ) 4458 4459 group = self._parse_group() 4460 4461 self._match_r_paren() 4462 4463 pivot = self.expression( 4464 exp.Pivot, 4465 expressions=expressions, 4466 fields=fields, 4467 unpivot=unpivot, 4468 include_nulls=include_nulls, 4469 default_on_null=default_on_null, 4470 group=group, 4471 ) 4472 4473 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4474 pivot.set("alias", self._parse_table_alias()) 4475 4476 if not unpivot: 4477 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4478 4479 columns: t.List[exp.Expression] = [] 4480 all_fields = [] 4481 for pivot_field in pivot.fields: 4482 pivot_field_expressions = pivot_field.expressions 4483 4484 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4485 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4486 continue 4487 4488 all_fields.append( 4489 [ 4490 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4491 for fld in pivot_field_expressions 4492 ] 4493 ) 4494 4495 if all_fields: 4496 if names: 4497 all_fields.append(names) 4498 4499 # Generate all possible combinations of the pivot columns 4500 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4501 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4502 for fld_parts_tuple in itertools.product(*all_fields): 4503 fld_parts = list(fld_parts_tuple) 4504 4505 if names and self.PREFIXED_PIVOT_COLUMNS: 4506 # Move the "name" to the front of the list 4507 fld_parts.insert(0, fld_parts.pop(-1)) 4508 4509 columns.append(exp.to_identifier("_".join(fld_parts))) 4510 4511 pivot.set("columns", columns) 4512 4513 return pivot 4514 4515 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4516 return [agg.alias for agg in aggregations if agg.alias] 4517 4518 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4519 if not skip_where_token and not self._match(TokenType.PREWHERE): 4520 return None 4521 4522 return self.expression( 4523 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4524 ) 4525 4526 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4527 if not skip_where_token and not self._match(TokenType.WHERE): 4528 return None 4529 4530 return self.expression( 4531 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4532 ) 4533 4534 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4535 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4536 return None 4537 comments = self._prev_comments 4538 4539 elements: t.Dict[str, t.Any] = defaultdict(list) 4540 4541 if self._match(TokenType.ALL): 4542 elements["all"] = True 4543 elif self._match(TokenType.DISTINCT): 4544 elements["all"] = False 4545 4546 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4547 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4548 4549 while True: 4550 index = self._index 4551 4552 elements["expressions"].extend( 4553 self._parse_csv( 4554 lambda: None 4555 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4556 else self._parse_assignment() 4557 ) 4558 ) 4559 4560 before_with_index = self._index 4561 with_prefix = self._match(TokenType.WITH) 4562 4563 if self._match(TokenType.ROLLUP): 4564 elements["rollup"].append( 4565 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4566 ) 4567 elif self._match(TokenType.CUBE): 4568 elements["cube"].append( 4569 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4570 ) 4571 elif self._match(TokenType.GROUPING_SETS): 4572 elements["grouping_sets"].append( 4573 self.expression( 4574 exp.GroupingSets, 4575 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4576 ) 4577 ) 4578 elif self._match_text_seq("TOTALS"): 4579 elements["totals"] = True # type: ignore 4580 4581 if before_with_index <= self._index <= before_with_index + 1: 4582 self._retreat(before_with_index) 4583 break 4584 4585 if index == self._index: 4586 break 4587 4588 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4589 4590 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4591 return self.expression( 4592 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4593 ) 4594 4595 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4596 if self._match(TokenType.L_PAREN): 4597 grouping_set = self._parse_csv(self._parse_column) 4598 self._match_r_paren() 4599 return self.expression(exp.Tuple, expressions=grouping_set) 4600 4601 return self._parse_column() 4602 4603 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4604 if not skip_having_token and not self._match(TokenType.HAVING): 4605 return None 4606 return self.expression( 4607 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4608 ) 4609 4610 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4611 if not self._match(TokenType.QUALIFY): 4612 return None 4613 return self.expression(exp.Qualify, this=self._parse_assignment()) 4614 4615 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4616 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4617 exp.Prior, this=self._parse_bitwise() 4618 ) 4619 connect = self._parse_assignment() 4620 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4621 return connect 4622 4623 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4624 if skip_start_token: 4625 start = None 4626 elif self._match(TokenType.START_WITH): 4627 start = self._parse_assignment() 4628 else: 4629 return None 4630 4631 self._match(TokenType.CONNECT_BY) 4632 nocycle = self._match_text_seq("NOCYCLE") 4633 connect = self._parse_connect_with_prior() 4634 4635 if not start and self._match(TokenType.START_WITH): 4636 start = self._parse_assignment() 4637 4638 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4639 4640 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4641 this = self._parse_id_var(any_token=True) 4642 if self._match(TokenType.ALIAS): 4643 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4644 return this 4645 4646 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4647 if self._match_text_seq("INTERPOLATE"): 4648 return self._parse_wrapped_csv(self._parse_name_as_expression) 4649 return None 4650 4651 def _parse_order( 4652 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4653 ) -> t.Optional[exp.Expression]: 4654 siblings = None 4655 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4656 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4657 return this 4658 4659 siblings = True 4660 4661 return self.expression( 4662 exp.Order, 4663 comments=self._prev_comments, 4664 this=this, 4665 expressions=self._parse_csv(self._parse_ordered), 4666 siblings=siblings, 4667 ) 4668 4669 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4670 if not self._match(token): 4671 return None 4672 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4673 4674 def _parse_ordered( 4675 self, parse_method: t.Optional[t.Callable] = None 4676 ) -> t.Optional[exp.Ordered]: 4677 this = parse_method() if parse_method else self._parse_assignment() 4678 if not this: 4679 return None 4680 4681 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4682 this = exp.var("ALL") 4683 4684 asc = self._match(TokenType.ASC) 4685 desc = self._match(TokenType.DESC) or (asc and False) 4686 4687 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4688 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4689 4690 nulls_first = is_nulls_first or False 4691 explicitly_null_ordered = is_nulls_first or is_nulls_last 4692 4693 if ( 4694 not explicitly_null_ordered 4695 and ( 4696 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4697 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4698 ) 4699 and self.dialect.NULL_ORDERING != "nulls_are_last" 4700 ): 4701 nulls_first = True 4702 4703 if self._match_text_seq("WITH", "FILL"): 4704 with_fill = self.expression( 4705 exp.WithFill, 4706 **{ # type: ignore 4707 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4708 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4709 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4710 "interpolate": self._parse_interpolate(), 4711 }, 4712 ) 4713 else: 4714 with_fill = None 4715 4716 return self.expression( 4717 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4718 ) 4719 4720 def _parse_limit_options(self) -> exp.LimitOptions: 4721 percent = self._match(TokenType.PERCENT) 4722 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4723 self._match_text_seq("ONLY") 4724 with_ties = self._match_text_seq("WITH", "TIES") 4725 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4726 4727 def _parse_limit( 4728 self, 4729 this: t.Optional[exp.Expression] = None, 4730 top: bool = False, 4731 skip_limit_token: bool = False, 4732 ) -> t.Optional[exp.Expression]: 4733 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4734 comments = self._prev_comments 4735 if top: 4736 limit_paren = self._match(TokenType.L_PAREN) 4737 expression = self._parse_term() if limit_paren else self._parse_number() 4738 4739 if limit_paren: 4740 self._match_r_paren() 4741 4742 limit_options = self._parse_limit_options() 4743 else: 4744 limit_options = None 4745 expression = self._parse_term() 4746 4747 if self._match(TokenType.COMMA): 4748 offset = expression 4749 expression = self._parse_term() 4750 else: 4751 offset = None 4752 4753 limit_exp = self.expression( 4754 exp.Limit, 4755 this=this, 4756 expression=expression, 4757 offset=offset, 4758 comments=comments, 4759 limit_options=limit_options, 4760 expressions=self._parse_limit_by(), 4761 ) 4762 4763 return limit_exp 4764 4765 if self._match(TokenType.FETCH): 4766 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4767 direction = self._prev.text.upper() if direction else "FIRST" 4768 4769 count = self._parse_field(tokens=self.FETCH_TOKENS) 4770 4771 return self.expression( 4772 exp.Fetch, 4773 direction=direction, 4774 count=count, 4775 limit_options=self._parse_limit_options(), 4776 ) 4777 4778 return this 4779 4780 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4781 if not self._match(TokenType.OFFSET): 4782 return this 4783 4784 count = self._parse_term() 4785 self._match_set((TokenType.ROW, TokenType.ROWS)) 4786 4787 return self.expression( 4788 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4789 ) 4790 4791 def _can_parse_limit_or_offset(self) -> bool: 4792 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4793 return False 4794 4795 index = self._index 4796 result = bool( 4797 self._try_parse(self._parse_limit, retreat=True) 4798 or self._try_parse(self._parse_offset, retreat=True) 4799 ) 4800 self._retreat(index) 4801 return result 4802 4803 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4804 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4805 4806 def _parse_locks(self) -> t.List[exp.Lock]: 4807 locks = [] 4808 while True: 4809 update, key = None, None 4810 if self._match_text_seq("FOR", "UPDATE"): 4811 update = True 4812 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4813 "LOCK", "IN", "SHARE", "MODE" 4814 ): 4815 update = False 4816 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4817 update, key = False, True 4818 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4819 update, key = True, True 4820 else: 4821 break 4822 4823 expressions = None 4824 if self._match_text_seq("OF"): 4825 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4826 4827 wait: t.Optional[bool | exp.Expression] = None 4828 if self._match_text_seq("NOWAIT"): 4829 wait = True 4830 elif self._match_text_seq("WAIT"): 4831 wait = self._parse_primary() 4832 elif self._match_text_seq("SKIP", "LOCKED"): 4833 wait = False 4834 4835 locks.append( 4836 self.expression( 4837 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4838 ) 4839 ) 4840 4841 return locks 4842 4843 def parse_set_operation( 4844 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4845 ) -> t.Optional[exp.Expression]: 4846 start = self._index 4847 _, side_token, kind_token = self._parse_join_parts() 4848 4849 side = side_token.text if side_token else None 4850 kind = kind_token.text if kind_token else None 4851 4852 if not self._match_set(self.SET_OPERATIONS): 4853 self._retreat(start) 4854 return None 4855 4856 token_type = self._prev.token_type 4857 4858 if token_type == TokenType.UNION: 4859 operation: t.Type[exp.SetOperation] = exp.Union 4860 elif token_type == TokenType.EXCEPT: 4861 operation = exp.Except 4862 else: 4863 operation = exp.Intersect 4864 4865 comments = self._prev.comments 4866 4867 if self._match(TokenType.DISTINCT): 4868 distinct: t.Optional[bool] = True 4869 elif self._match(TokenType.ALL): 4870 distinct = False 4871 else: 4872 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4873 if distinct is None: 4874 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4875 4876 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4877 "STRICT", "CORRESPONDING" 4878 ) 4879 if self._match_text_seq("CORRESPONDING"): 4880 by_name = True 4881 if not side and not kind: 4882 kind = "INNER" 4883 4884 on_column_list = None 4885 if by_name and self._match_texts(("ON", "BY")): 4886 on_column_list = self._parse_wrapped_csv(self._parse_column) 4887 4888 expression = self._parse_select( 4889 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4890 ) 4891 4892 return self.expression( 4893 operation, 4894 comments=comments, 4895 this=this, 4896 distinct=distinct, 4897 by_name=by_name, 4898 expression=expression, 4899 side=side, 4900 kind=kind, 4901 on=on_column_list, 4902 ) 4903 4904 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4905 while this: 4906 setop = self.parse_set_operation(this) 4907 if not setop: 4908 break 4909 this = setop 4910 4911 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4912 expression = this.expression 4913 4914 if expression: 4915 for arg in self.SET_OP_MODIFIERS: 4916 expr = expression.args.get(arg) 4917 if expr: 4918 this.set(arg, expr.pop()) 4919 4920 return this 4921 4922 def _parse_expression(self) -> t.Optional[exp.Expression]: 4923 return self._parse_alias(self._parse_assignment()) 4924 4925 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4926 this = self._parse_disjunction() 4927 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4928 # This allows us to parse <non-identifier token> := <expr> 4929 this = exp.column( 4930 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4931 ) 4932 4933 while self._match_set(self.ASSIGNMENT): 4934 if isinstance(this, exp.Column) and len(this.parts) == 1: 4935 this = this.this 4936 4937 this = self.expression( 4938 self.ASSIGNMENT[self._prev.token_type], 4939 this=this, 4940 comments=self._prev_comments, 4941 expression=self._parse_assignment(), 4942 ) 4943 4944 return this 4945 4946 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4947 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4948 4949 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4950 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4951 4952 def _parse_equality(self) -> t.Optional[exp.Expression]: 4953 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4954 4955 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4956 return self._parse_tokens(self._parse_range, self.COMPARISON) 4957 4958 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4959 this = this or self._parse_bitwise() 4960 negate = self._match(TokenType.NOT) 4961 4962 if self._match_set(self.RANGE_PARSERS): 4963 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4964 if not expression: 4965 return this 4966 4967 this = expression 4968 elif self._match(TokenType.ISNULL): 4969 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4970 4971 # Postgres supports ISNULL and NOTNULL for conditions. 4972 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4973 if self._match(TokenType.NOTNULL): 4974 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4975 this = self.expression(exp.Not, this=this) 4976 4977 if negate: 4978 this = self._negate_range(this) 4979 4980 if self._match(TokenType.IS): 4981 this = self._parse_is(this) 4982 4983 return this 4984 4985 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4986 if not this: 4987 return this 4988 4989 return self.expression(exp.Not, this=this) 4990 4991 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4992 index = self._index - 1 4993 negate = self._match(TokenType.NOT) 4994 4995 if self._match_text_seq("DISTINCT", "FROM"): 4996 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4997 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4998 4999 if self._match(TokenType.JSON): 5000 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5001 5002 if self._match_text_seq("WITH"): 5003 _with = True 5004 elif self._match_text_seq("WITHOUT"): 5005 _with = False 5006 else: 5007 _with = None 5008 5009 unique = self._match(TokenType.UNIQUE) 5010 self._match_text_seq("KEYS") 5011 expression: t.Optional[exp.Expression] = self.expression( 5012 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5013 ) 5014 else: 5015 expression = self._parse_primary() or self._parse_null() 5016 if not expression: 5017 self._retreat(index) 5018 return None 5019 5020 this = self.expression(exp.Is, this=this, expression=expression) 5021 return self.expression(exp.Not, this=this) if negate else this 5022 5023 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5024 unnest = self._parse_unnest(with_alias=False) 5025 if unnest: 5026 this = self.expression(exp.In, this=this, unnest=unnest) 5027 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5028 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5029 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5030 5031 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5032 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5033 else: 5034 this = self.expression(exp.In, this=this, expressions=expressions) 5035 5036 if matched_l_paren: 5037 self._match_r_paren(this) 5038 elif not self._match(TokenType.R_BRACKET, expression=this): 5039 self.raise_error("Expecting ]") 5040 else: 5041 this = self.expression(exp.In, this=this, field=self._parse_column()) 5042 5043 return this 5044 5045 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5046 symmetric = None 5047 if self._match_text_seq("SYMMETRIC"): 5048 symmetric = True 5049 elif self._match_text_seq("ASYMMETRIC"): 5050 symmetric = False 5051 5052 low = self._parse_bitwise() 5053 self._match(TokenType.AND) 5054 high = self._parse_bitwise() 5055 5056 return self.expression( 5057 exp.Between, 5058 this=this, 5059 low=low, 5060 high=high, 5061 symmetric=symmetric, 5062 ) 5063 5064 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5065 if not self._match(TokenType.ESCAPE): 5066 return this 5067 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5068 5069 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5070 index = self._index 5071 5072 if not self._match(TokenType.INTERVAL) and match_interval: 5073 return None 5074 5075 if self._match(TokenType.STRING, advance=False): 5076 this = self._parse_primary() 5077 else: 5078 this = self._parse_term() 5079 5080 if not this or ( 5081 isinstance(this, exp.Column) 5082 and not this.table 5083 and not this.this.quoted 5084 and this.name.upper() == "IS" 5085 ): 5086 self._retreat(index) 5087 return None 5088 5089 unit = self._parse_function() or ( 5090 not self._match(TokenType.ALIAS, advance=False) 5091 and self._parse_var(any_token=True, upper=True) 5092 ) 5093 5094 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5095 # each INTERVAL expression into this canonical form so it's easy to transpile 5096 if this and this.is_number: 5097 this = exp.Literal.string(this.to_py()) 5098 elif this and this.is_string: 5099 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5100 if parts and unit: 5101 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5102 unit = None 5103 self._retreat(self._index - 1) 5104 5105 if len(parts) == 1: 5106 this = exp.Literal.string(parts[0][0]) 5107 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5108 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5109 unit = self.expression( 5110 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5111 ) 5112 5113 interval = self.expression(exp.Interval, this=this, unit=unit) 5114 5115 index = self._index 5116 self._match(TokenType.PLUS) 5117 5118 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5119 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5120 return self.expression( 5121 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5122 ) 5123 5124 self._retreat(index) 5125 return interval 5126 5127 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5128 this = self._parse_term() 5129 5130 while True: 5131 if self._match_set(self.BITWISE): 5132 this = self.expression( 5133 self.BITWISE[self._prev.token_type], 5134 this=this, 5135 expression=self._parse_term(), 5136 ) 5137 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5138 this = self.expression( 5139 exp.DPipe, 5140 this=this, 5141 expression=self._parse_term(), 5142 safe=not self.dialect.STRICT_STRING_CONCAT, 5143 ) 5144 elif self._match(TokenType.DQMARK): 5145 this = self.expression( 5146 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5147 ) 5148 elif self._match_pair(TokenType.LT, TokenType.LT): 5149 this = self.expression( 5150 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5151 ) 5152 elif self._match_pair(TokenType.GT, TokenType.GT): 5153 this = self.expression( 5154 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5155 ) 5156 else: 5157 break 5158 5159 return this 5160 5161 def _parse_term(self) -> t.Optional[exp.Expression]: 5162 this = self._parse_factor() 5163 5164 while self._match_set(self.TERM): 5165 klass = self.TERM[self._prev.token_type] 5166 comments = self._prev_comments 5167 expression = self._parse_factor() 5168 5169 this = self.expression(klass, this=this, comments=comments, expression=expression) 5170 5171 if isinstance(this, exp.Collate): 5172 expr = this.expression 5173 5174 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5175 # fallback to Identifier / Var 5176 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5177 ident = expr.this 5178 if isinstance(ident, exp.Identifier): 5179 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5180 5181 return this 5182 5183 def _parse_factor(self) -> t.Optional[exp.Expression]: 5184 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5185 this = parse_method() 5186 5187 while self._match_set(self.FACTOR): 5188 klass = self.FACTOR[self._prev.token_type] 5189 comments = self._prev_comments 5190 expression = parse_method() 5191 5192 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5193 self._retreat(self._index - 1) 5194 return this 5195 5196 this = self.expression(klass, this=this, comments=comments, expression=expression) 5197 5198 if isinstance(this, exp.Div): 5199 this.args["typed"] = self.dialect.TYPED_DIVISION 5200 this.args["safe"] = self.dialect.SAFE_DIVISION 5201 5202 return this 5203 5204 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5205 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5206 5207 def _parse_unary(self) -> t.Optional[exp.Expression]: 5208 if self._match_set(self.UNARY_PARSERS): 5209 return self.UNARY_PARSERS[self._prev.token_type](self) 5210 return self._parse_at_time_zone(self._parse_type()) 5211 5212 def _parse_type( 5213 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5214 ) -> t.Optional[exp.Expression]: 5215 interval = parse_interval and self._parse_interval() 5216 if interval: 5217 return interval 5218 5219 index = self._index 5220 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5221 5222 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5223 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5224 if isinstance(data_type, exp.Cast): 5225 # This constructor can contain ops directly after it, for instance struct unnesting: 5226 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5227 return self._parse_column_ops(data_type) 5228 5229 if data_type: 5230 index2 = self._index 5231 this = self._parse_primary() 5232 5233 if isinstance(this, exp.Literal): 5234 literal = this.name 5235 this = self._parse_column_ops(this) 5236 5237 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5238 if parser: 5239 return parser(self, this, data_type) 5240 5241 if ( 5242 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5243 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5244 and TIME_ZONE_RE.search(literal) 5245 ): 5246 data_type = exp.DataType.build("TIMESTAMPTZ") 5247 5248 return self.expression(exp.Cast, this=this, to=data_type) 5249 5250 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5251 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5252 # 5253 # If the index difference here is greater than 1, that means the parser itself must have 5254 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5255 # 5256 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5257 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5258 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5259 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5260 # 5261 # In these cases, we don't really want to return the converted type, but instead retreat 5262 # and try to parse a Column or Identifier in the section below. 5263 if data_type.expressions and index2 - index > 1: 5264 self._retreat(index2) 5265 return self._parse_column_ops(data_type) 5266 5267 self._retreat(index) 5268 5269 if fallback_to_identifier: 5270 return self._parse_id_var() 5271 5272 this = self._parse_column() 5273 return this and self._parse_column_ops(this) 5274 5275 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5276 this = self._parse_type() 5277 if not this: 5278 return None 5279 5280 if isinstance(this, exp.Column) and not this.table: 5281 this = exp.var(this.name.upper()) 5282 5283 return self.expression( 5284 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5285 ) 5286 5287 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5288 type_name = identifier.name 5289 5290 while self._match(TokenType.DOT): 5291 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5292 5293 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5294 5295 def _parse_types( 5296 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5297 ) -> t.Optional[exp.Expression]: 5298 index = self._index 5299 5300 this: t.Optional[exp.Expression] = None 5301 prefix = self._match_text_seq("SYSUDTLIB", ".") 5302 5303 if self._match_set(self.TYPE_TOKENS): 5304 type_token = self._prev.token_type 5305 else: 5306 type_token = None 5307 identifier = allow_identifiers and self._parse_id_var( 5308 any_token=False, tokens=(TokenType.VAR,) 5309 ) 5310 if isinstance(identifier, exp.Identifier): 5311 try: 5312 tokens = self.dialect.tokenize(identifier.name) 5313 except TokenError: 5314 tokens = None 5315 5316 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5317 type_token = tokens[0].token_type 5318 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5319 this = self._parse_user_defined_type(identifier) 5320 else: 5321 self._retreat(self._index - 1) 5322 return None 5323 else: 5324 return None 5325 5326 if type_token == TokenType.PSEUDO_TYPE: 5327 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5328 5329 if type_token == TokenType.OBJECT_IDENTIFIER: 5330 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5331 5332 # https://materialize.com/docs/sql/types/map/ 5333 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5334 key_type = self._parse_types( 5335 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5336 ) 5337 if not self._match(TokenType.FARROW): 5338 self._retreat(index) 5339 return None 5340 5341 value_type = self._parse_types( 5342 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5343 ) 5344 if not self._match(TokenType.R_BRACKET): 5345 self._retreat(index) 5346 return None 5347 5348 return exp.DataType( 5349 this=exp.DataType.Type.MAP, 5350 expressions=[key_type, value_type], 5351 nested=True, 5352 prefix=prefix, 5353 ) 5354 5355 nested = type_token in self.NESTED_TYPE_TOKENS 5356 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5357 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5358 expressions = None 5359 maybe_func = False 5360 5361 if self._match(TokenType.L_PAREN): 5362 if is_struct: 5363 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5364 elif nested: 5365 expressions = self._parse_csv( 5366 lambda: self._parse_types( 5367 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5368 ) 5369 ) 5370 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5371 this = expressions[0] 5372 this.set("nullable", True) 5373 self._match_r_paren() 5374 return this 5375 elif type_token in self.ENUM_TYPE_TOKENS: 5376 expressions = self._parse_csv(self._parse_equality) 5377 elif is_aggregate: 5378 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5379 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5380 ) 5381 if not func_or_ident: 5382 return None 5383 expressions = [func_or_ident] 5384 if self._match(TokenType.COMMA): 5385 expressions.extend( 5386 self._parse_csv( 5387 lambda: self._parse_types( 5388 check_func=check_func, 5389 schema=schema, 5390 allow_identifiers=allow_identifiers, 5391 ) 5392 ) 5393 ) 5394 else: 5395 expressions = self._parse_csv(self._parse_type_size) 5396 5397 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5398 if type_token == TokenType.VECTOR and len(expressions) == 2: 5399 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5400 5401 if not self._match(TokenType.R_PAREN): 5402 self._retreat(index) 5403 return None 5404 5405 maybe_func = True 5406 5407 values: t.Optional[t.List[exp.Expression]] = None 5408 5409 if nested and self._match(TokenType.LT): 5410 if is_struct: 5411 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5412 else: 5413 expressions = self._parse_csv( 5414 lambda: self._parse_types( 5415 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5416 ) 5417 ) 5418 5419 if not self._match(TokenType.GT): 5420 self.raise_error("Expecting >") 5421 5422 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5423 values = self._parse_csv(self._parse_assignment) 5424 if not values and is_struct: 5425 values = None 5426 self._retreat(self._index - 1) 5427 else: 5428 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5429 5430 if type_token in self.TIMESTAMPS: 5431 if self._match_text_seq("WITH", "TIME", "ZONE"): 5432 maybe_func = False 5433 tz_type = ( 5434 exp.DataType.Type.TIMETZ 5435 if type_token in self.TIMES 5436 else exp.DataType.Type.TIMESTAMPTZ 5437 ) 5438 this = exp.DataType(this=tz_type, expressions=expressions) 5439 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5440 maybe_func = False 5441 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5442 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5443 maybe_func = False 5444 elif type_token == TokenType.INTERVAL: 5445 unit = self._parse_var(upper=True) 5446 if unit: 5447 if self._match_text_seq("TO"): 5448 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5449 5450 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5451 else: 5452 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5453 elif type_token == TokenType.VOID: 5454 this = exp.DataType(this=exp.DataType.Type.NULL) 5455 5456 if maybe_func and check_func: 5457 index2 = self._index 5458 peek = self._parse_string() 5459 5460 if not peek: 5461 self._retreat(index) 5462 return None 5463 5464 self._retreat(index2) 5465 5466 if not this: 5467 if self._match_text_seq("UNSIGNED"): 5468 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5469 if not unsigned_type_token: 5470 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5471 5472 type_token = unsigned_type_token or type_token 5473 5474 this = exp.DataType( 5475 this=exp.DataType.Type[type_token.value], 5476 expressions=expressions, 5477 nested=nested, 5478 prefix=prefix, 5479 ) 5480 5481 # Empty arrays/structs are allowed 5482 if values is not None: 5483 cls = exp.Struct if is_struct else exp.Array 5484 this = exp.cast(cls(expressions=values), this, copy=False) 5485 5486 elif expressions: 5487 this.set("expressions", expressions) 5488 5489 # https://materialize.com/docs/sql/types/list/#type-name 5490 while self._match(TokenType.LIST): 5491 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5492 5493 index = self._index 5494 5495 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5496 matched_array = self._match(TokenType.ARRAY) 5497 5498 while self._curr: 5499 datatype_token = self._prev.token_type 5500 matched_l_bracket = self._match(TokenType.L_BRACKET) 5501 5502 if (not matched_l_bracket and not matched_array) or ( 5503 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5504 ): 5505 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5506 # not to be confused with the fixed size array parsing 5507 break 5508 5509 matched_array = False 5510 values = self._parse_csv(self._parse_assignment) or None 5511 if ( 5512 values 5513 and not schema 5514 and ( 5515 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5516 ) 5517 ): 5518 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5519 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5520 self._retreat(index) 5521 break 5522 5523 this = exp.DataType( 5524 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5525 ) 5526 self._match(TokenType.R_BRACKET) 5527 5528 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5529 converter = self.TYPE_CONVERTERS.get(this.this) 5530 if converter: 5531 this = converter(t.cast(exp.DataType, this)) 5532 5533 return this 5534 5535 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5536 index = self._index 5537 5538 if ( 5539 self._curr 5540 and self._next 5541 and self._curr.token_type in self.TYPE_TOKENS 5542 and self._next.token_type in self.TYPE_TOKENS 5543 ): 5544 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5545 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5546 this = self._parse_id_var() 5547 else: 5548 this = ( 5549 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5550 or self._parse_id_var() 5551 ) 5552 5553 self._match(TokenType.COLON) 5554 5555 if ( 5556 type_required 5557 and not isinstance(this, exp.DataType) 5558 and not self._match_set(self.TYPE_TOKENS, advance=False) 5559 ): 5560 self._retreat(index) 5561 return self._parse_types() 5562 5563 return self._parse_column_def(this) 5564 5565 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5566 if not self._match_text_seq("AT", "TIME", "ZONE"): 5567 return this 5568 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5569 5570 def _parse_column(self) -> t.Optional[exp.Expression]: 5571 this = self._parse_column_reference() 5572 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5573 5574 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5575 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5576 5577 return column 5578 5579 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5580 this = self._parse_field() 5581 if ( 5582 not this 5583 and self._match(TokenType.VALUES, advance=False) 5584 and self.VALUES_FOLLOWED_BY_PAREN 5585 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5586 ): 5587 this = self._parse_id_var() 5588 5589 if isinstance(this, exp.Identifier): 5590 # We bubble up comments from the Identifier to the Column 5591 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5592 5593 return this 5594 5595 def _parse_colon_as_variant_extract( 5596 self, this: t.Optional[exp.Expression] 5597 ) -> t.Optional[exp.Expression]: 5598 casts = [] 5599 json_path = [] 5600 escape = None 5601 5602 while self._match(TokenType.COLON): 5603 start_index = self._index 5604 5605 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5606 path = self._parse_column_ops( 5607 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5608 ) 5609 5610 # The cast :: operator has a lower precedence than the extraction operator :, so 5611 # we rearrange the AST appropriately to avoid casting the JSON path 5612 while isinstance(path, exp.Cast): 5613 casts.append(path.to) 5614 path = path.this 5615 5616 if casts: 5617 dcolon_offset = next( 5618 i 5619 for i, t in enumerate(self._tokens[start_index:]) 5620 if t.token_type == TokenType.DCOLON 5621 ) 5622 end_token = self._tokens[start_index + dcolon_offset - 1] 5623 else: 5624 end_token = self._prev 5625 5626 if path: 5627 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5628 # it'll roundtrip to a string literal in GET_PATH 5629 if isinstance(path, exp.Identifier) and path.quoted: 5630 escape = True 5631 5632 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5633 5634 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5635 # Databricks transforms it back to the colon/dot notation 5636 if json_path: 5637 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5638 5639 if json_path_expr: 5640 json_path_expr.set("escape", escape) 5641 5642 this = self.expression( 5643 exp.JSONExtract, 5644 this=this, 5645 expression=json_path_expr, 5646 variant_extract=True, 5647 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5648 ) 5649 5650 while casts: 5651 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5652 5653 return this 5654 5655 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5656 return self._parse_types() 5657 5658 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5659 this = self._parse_bracket(this) 5660 5661 while self._match_set(self.COLUMN_OPERATORS): 5662 op_token = self._prev.token_type 5663 op = self.COLUMN_OPERATORS.get(op_token) 5664 5665 if op_token in self.CAST_COLUMN_OPERATORS: 5666 field = self._parse_dcolon() 5667 if not field: 5668 self.raise_error("Expected type") 5669 elif op and self._curr: 5670 field = self._parse_column_reference() or self._parse_bracket() 5671 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5672 field = self._parse_column_ops(field) 5673 else: 5674 field = self._parse_field(any_token=True, anonymous_func=True) 5675 5676 # Function calls can be qualified, e.g., x.y.FOO() 5677 # This converts the final AST to a series of Dots leading to the function call 5678 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5679 if isinstance(field, (exp.Func, exp.Window)) and this: 5680 this = this.transform( 5681 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5682 ) 5683 5684 if op: 5685 this = op(self, this, field) 5686 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5687 this = self.expression( 5688 exp.Column, 5689 comments=this.comments, 5690 this=field, 5691 table=this.this, 5692 db=this.args.get("table"), 5693 catalog=this.args.get("db"), 5694 ) 5695 elif isinstance(field, exp.Window): 5696 # Move the exp.Dot's to the window's function 5697 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5698 field.set("this", window_func) 5699 this = field 5700 else: 5701 this = self.expression(exp.Dot, this=this, expression=field) 5702 5703 if field and field.comments: 5704 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5705 5706 this = self._parse_bracket(this) 5707 5708 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5709 5710 def _parse_paren(self) -> t.Optional[exp.Expression]: 5711 if not self._match(TokenType.L_PAREN): 5712 return None 5713 5714 comments = self._prev_comments 5715 query = self._parse_select() 5716 5717 if query: 5718 expressions = [query] 5719 else: 5720 expressions = self._parse_expressions() 5721 5722 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5723 5724 if not this and self._match(TokenType.R_PAREN, advance=False): 5725 this = self.expression(exp.Tuple) 5726 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5727 this = self._parse_subquery(this=this, parse_alias=False) 5728 elif isinstance(this, exp.Subquery): 5729 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5730 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5731 this = self.expression(exp.Tuple, expressions=expressions) 5732 else: 5733 this = self.expression(exp.Paren, this=this) 5734 5735 if this: 5736 this.add_comments(comments) 5737 5738 self._match_r_paren(expression=this) 5739 return this 5740 5741 def _parse_primary(self) -> t.Optional[exp.Expression]: 5742 if self._match_set(self.PRIMARY_PARSERS): 5743 token_type = self._prev.token_type 5744 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5745 5746 if token_type == TokenType.STRING: 5747 expressions = [primary] 5748 while self._match(TokenType.STRING): 5749 expressions.append(exp.Literal.string(self._prev.text)) 5750 5751 if len(expressions) > 1: 5752 return self.expression(exp.Concat, expressions=expressions) 5753 5754 return primary 5755 5756 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5757 return exp.Literal.number(f"0.{self._prev.text}") 5758 5759 return self._parse_paren() 5760 5761 def _parse_field( 5762 self, 5763 any_token: bool = False, 5764 tokens: t.Optional[t.Collection[TokenType]] = None, 5765 anonymous_func: bool = False, 5766 ) -> t.Optional[exp.Expression]: 5767 if anonymous_func: 5768 field = ( 5769 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5770 or self._parse_primary() 5771 ) 5772 else: 5773 field = self._parse_primary() or self._parse_function( 5774 anonymous=anonymous_func, any_token=any_token 5775 ) 5776 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5777 5778 def _parse_function( 5779 self, 5780 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5781 anonymous: bool = False, 5782 optional_parens: bool = True, 5783 any_token: bool = False, 5784 ) -> t.Optional[exp.Expression]: 5785 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5786 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5787 fn_syntax = False 5788 if ( 5789 self._match(TokenType.L_BRACE, advance=False) 5790 and self._next 5791 and self._next.text.upper() == "FN" 5792 ): 5793 self._advance(2) 5794 fn_syntax = True 5795 5796 func = self._parse_function_call( 5797 functions=functions, 5798 anonymous=anonymous, 5799 optional_parens=optional_parens, 5800 any_token=any_token, 5801 ) 5802 5803 if fn_syntax: 5804 self._match(TokenType.R_BRACE) 5805 5806 return func 5807 5808 def _parse_function_call( 5809 self, 5810 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5811 anonymous: bool = False, 5812 optional_parens: bool = True, 5813 any_token: bool = False, 5814 ) -> t.Optional[exp.Expression]: 5815 if not self._curr: 5816 return None 5817 5818 comments = self._curr.comments 5819 prev = self._prev 5820 token = self._curr 5821 token_type = self._curr.token_type 5822 this = self._curr.text 5823 upper = this.upper() 5824 5825 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5826 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5827 self._advance() 5828 return self._parse_window(parser(self)) 5829 5830 if not self._next or self._next.token_type != TokenType.L_PAREN: 5831 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5832 self._advance() 5833 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5834 5835 return None 5836 5837 if any_token: 5838 if token_type in self.RESERVED_TOKENS: 5839 return None 5840 elif token_type not in self.FUNC_TOKENS: 5841 return None 5842 5843 self._advance(2) 5844 5845 parser = self.FUNCTION_PARSERS.get(upper) 5846 if parser and not anonymous: 5847 this = parser(self) 5848 else: 5849 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5850 5851 if subquery_predicate: 5852 expr = None 5853 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5854 expr = self._parse_select() 5855 self._match_r_paren() 5856 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5857 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5858 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5859 self._advance(-1) 5860 expr = self._parse_bitwise() 5861 5862 if expr: 5863 return self.expression(subquery_predicate, comments=comments, this=expr) 5864 5865 if functions is None: 5866 functions = self.FUNCTIONS 5867 5868 function = functions.get(upper) 5869 known_function = function and not anonymous 5870 5871 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5872 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5873 5874 post_func_comments = self._curr and self._curr.comments 5875 if known_function and post_func_comments: 5876 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5877 # call we'll construct it as exp.Anonymous, even if it's "known" 5878 if any( 5879 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5880 for comment in post_func_comments 5881 ): 5882 known_function = False 5883 5884 if alias and known_function: 5885 args = self._kv_to_prop_eq(args) 5886 5887 if known_function: 5888 func_builder = t.cast(t.Callable, function) 5889 5890 if "dialect" in func_builder.__code__.co_varnames: 5891 func = func_builder(args, dialect=self.dialect) 5892 else: 5893 func = func_builder(args) 5894 5895 func = self.validate_expression(func, args) 5896 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5897 func.meta["name"] = this 5898 5899 this = func 5900 else: 5901 if token_type == TokenType.IDENTIFIER: 5902 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5903 5904 this = self.expression(exp.Anonymous, this=this, expressions=args) 5905 this = this.update_positions(token) 5906 5907 if isinstance(this, exp.Expression): 5908 this.add_comments(comments) 5909 5910 self._match_r_paren(this) 5911 return self._parse_window(this) 5912 5913 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5914 return expression 5915 5916 def _kv_to_prop_eq( 5917 self, expressions: t.List[exp.Expression], parse_map: bool = False 5918 ) -> t.List[exp.Expression]: 5919 transformed = [] 5920 5921 for index, e in enumerate(expressions): 5922 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5923 if isinstance(e, exp.Alias): 5924 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5925 5926 if not isinstance(e, exp.PropertyEQ): 5927 e = self.expression( 5928 exp.PropertyEQ, 5929 this=e.this if parse_map else exp.to_identifier(e.this.name), 5930 expression=e.expression, 5931 ) 5932 5933 if isinstance(e.this, exp.Column): 5934 e.this.replace(e.this.this) 5935 else: 5936 e = self._to_prop_eq(e, index) 5937 5938 transformed.append(e) 5939 5940 return transformed 5941 5942 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5943 return self._parse_statement() 5944 5945 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5946 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5947 5948 def _parse_user_defined_function( 5949 self, kind: t.Optional[TokenType] = None 5950 ) -> t.Optional[exp.Expression]: 5951 this = self._parse_table_parts(schema=True) 5952 5953 if not self._match(TokenType.L_PAREN): 5954 return this 5955 5956 expressions = self._parse_csv(self._parse_function_parameter) 5957 self._match_r_paren() 5958 return self.expression( 5959 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5960 ) 5961 5962 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5963 literal = self._parse_primary() 5964 if literal: 5965 return self.expression(exp.Introducer, this=token.text, expression=literal) 5966 5967 return self._identifier_expression(token) 5968 5969 def _parse_session_parameter(self) -> exp.SessionParameter: 5970 kind = None 5971 this = self._parse_id_var() or self._parse_primary() 5972 5973 if this and self._match(TokenType.DOT): 5974 kind = this.name 5975 this = self._parse_var() or self._parse_primary() 5976 5977 return self.expression(exp.SessionParameter, this=this, kind=kind) 5978 5979 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5980 return self._parse_id_var() 5981 5982 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5983 index = self._index 5984 5985 if self._match(TokenType.L_PAREN): 5986 expressions = t.cast( 5987 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5988 ) 5989 5990 if not self._match(TokenType.R_PAREN): 5991 self._retreat(index) 5992 else: 5993 expressions = [self._parse_lambda_arg()] 5994 5995 if self._match_set(self.LAMBDAS): 5996 return self.LAMBDAS[self._prev.token_type](self, expressions) 5997 5998 self._retreat(index) 5999 6000 this: t.Optional[exp.Expression] 6001 6002 if self._match(TokenType.DISTINCT): 6003 this = self.expression( 6004 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6005 ) 6006 else: 6007 this = self._parse_select_or_expression(alias=alias) 6008 6009 return self._parse_limit( 6010 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6011 ) 6012 6013 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6014 index = self._index 6015 if not self._match(TokenType.L_PAREN): 6016 return this 6017 6018 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6019 # expr can be of both types 6020 if self._match_set(self.SELECT_START_TOKENS): 6021 self._retreat(index) 6022 return this 6023 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6024 self._match_r_paren() 6025 return self.expression(exp.Schema, this=this, expressions=args) 6026 6027 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6028 return self._parse_column_def(self._parse_field(any_token=True)) 6029 6030 def _parse_column_def( 6031 self, this: t.Optional[exp.Expression], computed_column: bool = True 6032 ) -> t.Optional[exp.Expression]: 6033 # column defs are not really columns, they're identifiers 6034 if isinstance(this, exp.Column): 6035 this = this.this 6036 6037 if not computed_column: 6038 self._match(TokenType.ALIAS) 6039 6040 kind = self._parse_types(schema=True) 6041 6042 if self._match_text_seq("FOR", "ORDINALITY"): 6043 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6044 6045 constraints: t.List[exp.Expression] = [] 6046 6047 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6048 ("ALIAS", "MATERIALIZED") 6049 ): 6050 persisted = self._prev.text.upper() == "MATERIALIZED" 6051 constraint_kind = exp.ComputedColumnConstraint( 6052 this=self._parse_assignment(), 6053 persisted=persisted or self._match_text_seq("PERSISTED"), 6054 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6055 ) 6056 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6057 elif ( 6058 kind 6059 and self._match(TokenType.ALIAS, advance=False) 6060 and ( 6061 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6062 or (self._next and self._next.token_type == TokenType.L_PAREN) 6063 ) 6064 ): 6065 self._advance() 6066 constraints.append( 6067 self.expression( 6068 exp.ColumnConstraint, 6069 kind=exp.ComputedColumnConstraint( 6070 this=self._parse_disjunction(), 6071 persisted=self._match_texts(("STORED", "VIRTUAL")) 6072 and self._prev.text.upper() == "STORED", 6073 ), 6074 ) 6075 ) 6076 6077 while True: 6078 constraint = self._parse_column_constraint() 6079 if not constraint: 6080 break 6081 constraints.append(constraint) 6082 6083 if not kind and not constraints: 6084 return this 6085 6086 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6087 6088 def _parse_auto_increment( 6089 self, 6090 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6091 start = None 6092 increment = None 6093 order = None 6094 6095 if self._match(TokenType.L_PAREN, advance=False): 6096 args = self._parse_wrapped_csv(self._parse_bitwise) 6097 start = seq_get(args, 0) 6098 increment = seq_get(args, 1) 6099 elif self._match_text_seq("START"): 6100 start = self._parse_bitwise() 6101 self._match_text_seq("INCREMENT") 6102 increment = self._parse_bitwise() 6103 if self._match_text_seq("ORDER"): 6104 order = True 6105 elif self._match_text_seq("NOORDER"): 6106 order = False 6107 6108 if start and increment: 6109 return exp.GeneratedAsIdentityColumnConstraint( 6110 start=start, increment=increment, this=False, order=order 6111 ) 6112 6113 return exp.AutoIncrementColumnConstraint() 6114 6115 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6116 if not self._match_text_seq("REFRESH"): 6117 self._retreat(self._index - 1) 6118 return None 6119 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6120 6121 def _parse_compress(self) -> exp.CompressColumnConstraint: 6122 if self._match(TokenType.L_PAREN, advance=False): 6123 return self.expression( 6124 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6125 ) 6126 6127 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6128 6129 def _parse_generated_as_identity( 6130 self, 6131 ) -> ( 6132 exp.GeneratedAsIdentityColumnConstraint 6133 | exp.ComputedColumnConstraint 6134 | exp.GeneratedAsRowColumnConstraint 6135 ): 6136 if self._match_text_seq("BY", "DEFAULT"): 6137 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6138 this = self.expression( 6139 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6140 ) 6141 else: 6142 self._match_text_seq("ALWAYS") 6143 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6144 6145 self._match(TokenType.ALIAS) 6146 6147 if self._match_text_seq("ROW"): 6148 start = self._match_text_seq("START") 6149 if not start: 6150 self._match(TokenType.END) 6151 hidden = self._match_text_seq("HIDDEN") 6152 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6153 6154 identity = self._match_text_seq("IDENTITY") 6155 6156 if self._match(TokenType.L_PAREN): 6157 if self._match(TokenType.START_WITH): 6158 this.set("start", self._parse_bitwise()) 6159 if self._match_text_seq("INCREMENT", "BY"): 6160 this.set("increment", self._parse_bitwise()) 6161 if self._match_text_seq("MINVALUE"): 6162 this.set("minvalue", self._parse_bitwise()) 6163 if self._match_text_seq("MAXVALUE"): 6164 this.set("maxvalue", self._parse_bitwise()) 6165 6166 if self._match_text_seq("CYCLE"): 6167 this.set("cycle", True) 6168 elif self._match_text_seq("NO", "CYCLE"): 6169 this.set("cycle", False) 6170 6171 if not identity: 6172 this.set("expression", self._parse_range()) 6173 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6174 args = self._parse_csv(self._parse_bitwise) 6175 this.set("start", seq_get(args, 0)) 6176 this.set("increment", seq_get(args, 1)) 6177 6178 self._match_r_paren() 6179 6180 return this 6181 6182 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6183 self._match_text_seq("LENGTH") 6184 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6185 6186 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6187 if self._match_text_seq("NULL"): 6188 return self.expression(exp.NotNullColumnConstraint) 6189 if self._match_text_seq("CASESPECIFIC"): 6190 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6191 if self._match_text_seq("FOR", "REPLICATION"): 6192 return self.expression(exp.NotForReplicationColumnConstraint) 6193 6194 # Unconsume the `NOT` token 6195 self._retreat(self._index - 1) 6196 return None 6197 6198 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6199 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6200 6201 procedure_option_follows = ( 6202 self._match(TokenType.WITH, advance=False) 6203 and self._next 6204 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6205 ) 6206 6207 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6208 return self.expression( 6209 exp.ColumnConstraint, 6210 this=this, 6211 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6212 ) 6213 6214 return this 6215 6216 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6217 if not self._match(TokenType.CONSTRAINT): 6218 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6219 6220 return self.expression( 6221 exp.Constraint, 6222 this=self._parse_id_var(), 6223 expressions=self._parse_unnamed_constraints(), 6224 ) 6225 6226 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6227 constraints = [] 6228 while True: 6229 constraint = self._parse_unnamed_constraint() or self._parse_function() 6230 if not constraint: 6231 break 6232 constraints.append(constraint) 6233 6234 return constraints 6235 6236 def _parse_unnamed_constraint( 6237 self, constraints: t.Optional[t.Collection[str]] = None 6238 ) -> t.Optional[exp.Expression]: 6239 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6240 constraints or self.CONSTRAINT_PARSERS 6241 ): 6242 return None 6243 6244 constraint = self._prev.text.upper() 6245 if constraint not in self.CONSTRAINT_PARSERS: 6246 self.raise_error(f"No parser found for schema constraint {constraint}.") 6247 6248 return self.CONSTRAINT_PARSERS[constraint](self) 6249 6250 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6251 return self._parse_id_var(any_token=False) 6252 6253 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6254 self._match_texts(("KEY", "INDEX")) 6255 return self.expression( 6256 exp.UniqueColumnConstraint, 6257 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6258 this=self._parse_schema(self._parse_unique_key()), 6259 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6260 on_conflict=self._parse_on_conflict(), 6261 options=self._parse_key_constraint_options(), 6262 ) 6263 6264 def _parse_key_constraint_options(self) -> t.List[str]: 6265 options = [] 6266 while True: 6267 if not self._curr: 6268 break 6269 6270 if self._match(TokenType.ON): 6271 action = None 6272 on = self._advance_any() and self._prev.text 6273 6274 if self._match_text_seq("NO", "ACTION"): 6275 action = "NO ACTION" 6276 elif self._match_text_seq("CASCADE"): 6277 action = "CASCADE" 6278 elif self._match_text_seq("RESTRICT"): 6279 action = "RESTRICT" 6280 elif self._match_pair(TokenType.SET, TokenType.NULL): 6281 action = "SET NULL" 6282 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6283 action = "SET DEFAULT" 6284 else: 6285 self.raise_error("Invalid key constraint") 6286 6287 options.append(f"ON {on} {action}") 6288 else: 6289 var = self._parse_var_from_options( 6290 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6291 ) 6292 if not var: 6293 break 6294 options.append(var.name) 6295 6296 return options 6297 6298 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6299 if match and not self._match(TokenType.REFERENCES): 6300 return None 6301 6302 expressions = None 6303 this = self._parse_table(schema=True) 6304 options = self._parse_key_constraint_options() 6305 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6306 6307 def _parse_foreign_key(self) -> exp.ForeignKey: 6308 expressions = ( 6309 self._parse_wrapped_id_vars() 6310 if not self._match(TokenType.REFERENCES, advance=False) 6311 else None 6312 ) 6313 reference = self._parse_references() 6314 on_options = {} 6315 6316 while self._match(TokenType.ON): 6317 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6318 self.raise_error("Expected DELETE or UPDATE") 6319 6320 kind = self._prev.text.lower() 6321 6322 if self._match_text_seq("NO", "ACTION"): 6323 action = "NO ACTION" 6324 elif self._match(TokenType.SET): 6325 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6326 action = "SET " + self._prev.text.upper() 6327 else: 6328 self._advance() 6329 action = self._prev.text.upper() 6330 6331 on_options[kind] = action 6332 6333 return self.expression( 6334 exp.ForeignKey, 6335 expressions=expressions, 6336 reference=reference, 6337 options=self._parse_key_constraint_options(), 6338 **on_options, # type: ignore 6339 ) 6340 6341 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6342 return self._parse_ordered() or self._parse_field() 6343 6344 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6345 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6346 self._retreat(self._index - 1) 6347 return None 6348 6349 id_vars = self._parse_wrapped_id_vars() 6350 return self.expression( 6351 exp.PeriodForSystemTimeConstraint, 6352 this=seq_get(id_vars, 0), 6353 expression=seq_get(id_vars, 1), 6354 ) 6355 6356 def _parse_primary_key( 6357 self, wrapped_optional: bool = False, in_props: bool = False 6358 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6359 desc = ( 6360 self._match_set((TokenType.ASC, TokenType.DESC)) 6361 and self._prev.token_type == TokenType.DESC 6362 ) 6363 6364 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6365 return self.expression( 6366 exp.PrimaryKeyColumnConstraint, 6367 desc=desc, 6368 options=self._parse_key_constraint_options(), 6369 ) 6370 6371 expressions = self._parse_wrapped_csv( 6372 self._parse_primary_key_part, optional=wrapped_optional 6373 ) 6374 6375 return self.expression( 6376 exp.PrimaryKey, 6377 expressions=expressions, 6378 include=self._parse_index_params(), 6379 options=self._parse_key_constraint_options(), 6380 ) 6381 6382 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6383 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6384 6385 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6386 """ 6387 Parses a datetime column in ODBC format. We parse the column into the corresponding 6388 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6389 same as we did for `DATE('yyyy-mm-dd')`. 6390 6391 Reference: 6392 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6393 """ 6394 self._match(TokenType.VAR) 6395 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6396 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6397 if not self._match(TokenType.R_BRACE): 6398 self.raise_error("Expected }") 6399 return expression 6400 6401 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6402 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6403 return this 6404 6405 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6406 map_token = seq_get(self._tokens, self._index - 2) 6407 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6408 else: 6409 parse_map = False 6410 6411 bracket_kind = self._prev.token_type 6412 if ( 6413 bracket_kind == TokenType.L_BRACE 6414 and self._curr 6415 and self._curr.token_type == TokenType.VAR 6416 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6417 ): 6418 return self._parse_odbc_datetime_literal() 6419 6420 expressions = self._parse_csv( 6421 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6422 ) 6423 6424 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6425 self.raise_error("Expected ]") 6426 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6427 self.raise_error("Expected }") 6428 6429 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6430 if bracket_kind == TokenType.L_BRACE: 6431 this = self.expression( 6432 exp.Struct, 6433 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6434 ) 6435 elif not this: 6436 this = build_array_constructor( 6437 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6438 ) 6439 else: 6440 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6441 if constructor_type: 6442 return build_array_constructor( 6443 constructor_type, 6444 args=expressions, 6445 bracket_kind=bracket_kind, 6446 dialect=self.dialect, 6447 ) 6448 6449 expressions = apply_index_offset( 6450 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6451 ) 6452 this = self.expression( 6453 exp.Bracket, 6454 this=this, 6455 expressions=expressions, 6456 comments=this.pop_comments(), 6457 ) 6458 6459 self._add_comments(this) 6460 return self._parse_bracket(this) 6461 6462 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6463 if self._match(TokenType.COLON): 6464 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6465 return this 6466 6467 def _parse_case(self) -> t.Optional[exp.Expression]: 6468 if self._match(TokenType.DOT, advance=False): 6469 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 6470 self._retreat(self._index - 1) 6471 return None 6472 6473 ifs = [] 6474 default = None 6475 6476 comments = self._prev_comments 6477 expression = self._parse_assignment() 6478 6479 while self._match(TokenType.WHEN): 6480 this = self._parse_assignment() 6481 self._match(TokenType.THEN) 6482 then = self._parse_assignment() 6483 ifs.append(self.expression(exp.If, this=this, true=then)) 6484 6485 if self._match(TokenType.ELSE): 6486 default = self._parse_assignment() 6487 6488 if not self._match(TokenType.END): 6489 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6490 default = exp.column("interval") 6491 else: 6492 self.raise_error("Expected END after CASE", self._prev) 6493 6494 return self.expression( 6495 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6496 ) 6497 6498 def _parse_if(self) -> t.Optional[exp.Expression]: 6499 if self._match(TokenType.L_PAREN): 6500 args = self._parse_csv( 6501 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6502 ) 6503 this = self.validate_expression(exp.If.from_arg_list(args), args) 6504 self._match_r_paren() 6505 else: 6506 index = self._index - 1 6507 6508 if self.NO_PAREN_IF_COMMANDS and index == 0: 6509 return self._parse_as_command(self._prev) 6510 6511 condition = self._parse_assignment() 6512 6513 if not condition: 6514 self._retreat(index) 6515 return None 6516 6517 self._match(TokenType.THEN) 6518 true = self._parse_assignment() 6519 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6520 self._match(TokenType.END) 6521 this = self.expression(exp.If, this=condition, true=true, false=false) 6522 6523 return this 6524 6525 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6526 if not self._match_text_seq("VALUE", "FOR"): 6527 self._retreat(self._index - 1) 6528 return None 6529 6530 return self.expression( 6531 exp.NextValueFor, 6532 this=self._parse_column(), 6533 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6534 ) 6535 6536 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6537 this = self._parse_function() or self._parse_var_or_string(upper=True) 6538 6539 if self._match(TokenType.FROM): 6540 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6541 6542 if not self._match(TokenType.COMMA): 6543 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6544 6545 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6546 6547 def _parse_gap_fill(self) -> exp.GapFill: 6548 self._match(TokenType.TABLE) 6549 this = self._parse_table() 6550 6551 self._match(TokenType.COMMA) 6552 args = [this, *self._parse_csv(self._parse_lambda)] 6553 6554 gap_fill = exp.GapFill.from_arg_list(args) 6555 return self.validate_expression(gap_fill, args) 6556 6557 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6558 this = self._parse_assignment() 6559 6560 if not self._match(TokenType.ALIAS): 6561 if self._match(TokenType.COMMA): 6562 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6563 6564 self.raise_error("Expected AS after CAST") 6565 6566 fmt = None 6567 to = self._parse_types() 6568 6569 default = self._match(TokenType.DEFAULT) 6570 if default: 6571 default = self._parse_bitwise() 6572 self._match_text_seq("ON", "CONVERSION", "ERROR") 6573 6574 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6575 fmt_string = self._parse_string() 6576 fmt = self._parse_at_time_zone(fmt_string) 6577 6578 if not to: 6579 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6580 if to.this in exp.DataType.TEMPORAL_TYPES: 6581 this = self.expression( 6582 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6583 this=this, 6584 format=exp.Literal.string( 6585 format_time( 6586 fmt_string.this if fmt_string else "", 6587 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6588 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6589 ) 6590 ), 6591 safe=safe, 6592 ) 6593 6594 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6595 this.set("zone", fmt.args["zone"]) 6596 return this 6597 elif not to: 6598 self.raise_error("Expected TYPE after CAST") 6599 elif isinstance(to, exp.Identifier): 6600 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6601 elif to.this == exp.DataType.Type.CHAR: 6602 if self._match(TokenType.CHARACTER_SET): 6603 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6604 6605 return self.build_cast( 6606 strict=strict, 6607 this=this, 6608 to=to, 6609 format=fmt, 6610 safe=safe, 6611 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6612 default=default, 6613 ) 6614 6615 def _parse_string_agg(self) -> exp.GroupConcat: 6616 if self._match(TokenType.DISTINCT): 6617 args: t.List[t.Optional[exp.Expression]] = [ 6618 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6619 ] 6620 if self._match(TokenType.COMMA): 6621 args.extend(self._parse_csv(self._parse_assignment)) 6622 else: 6623 args = self._parse_csv(self._parse_assignment) # type: ignore 6624 6625 if self._match_text_seq("ON", "OVERFLOW"): 6626 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6627 if self._match_text_seq("ERROR"): 6628 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6629 else: 6630 self._match_text_seq("TRUNCATE") 6631 on_overflow = self.expression( 6632 exp.OverflowTruncateBehavior, 6633 this=self._parse_string(), 6634 with_count=( 6635 self._match_text_seq("WITH", "COUNT") 6636 or not self._match_text_seq("WITHOUT", "COUNT") 6637 ), 6638 ) 6639 else: 6640 on_overflow = None 6641 6642 index = self._index 6643 if not self._match(TokenType.R_PAREN) and args: 6644 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6645 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6646 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6647 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6648 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6649 6650 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6651 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6652 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6653 if not self._match_text_seq("WITHIN", "GROUP"): 6654 self._retreat(index) 6655 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6656 6657 # The corresponding match_r_paren will be called in parse_function (caller) 6658 self._match_l_paren() 6659 6660 return self.expression( 6661 exp.GroupConcat, 6662 this=self._parse_order(this=seq_get(args, 0)), 6663 separator=seq_get(args, 1), 6664 on_overflow=on_overflow, 6665 ) 6666 6667 def _parse_convert( 6668 self, strict: bool, safe: t.Optional[bool] = None 6669 ) -> t.Optional[exp.Expression]: 6670 this = self._parse_bitwise() 6671 6672 if self._match(TokenType.USING): 6673 to: t.Optional[exp.Expression] = self.expression( 6674 exp.CharacterSet, this=self._parse_var() 6675 ) 6676 elif self._match(TokenType.COMMA): 6677 to = self._parse_types() 6678 else: 6679 to = None 6680 6681 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6682 6683 def _parse_xml_table(self) -> exp.XMLTable: 6684 namespaces = None 6685 passing = None 6686 columns = None 6687 6688 if self._match_text_seq("XMLNAMESPACES", "("): 6689 namespaces = self._parse_xml_namespace() 6690 self._match_text_seq(")", ",") 6691 6692 this = self._parse_string() 6693 6694 if self._match_text_seq("PASSING"): 6695 # The BY VALUE keywords are optional and are provided for semantic clarity 6696 self._match_text_seq("BY", "VALUE") 6697 passing = self._parse_csv(self._parse_column) 6698 6699 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6700 6701 if self._match_text_seq("COLUMNS"): 6702 columns = self._parse_csv(self._parse_field_def) 6703 6704 return self.expression( 6705 exp.XMLTable, 6706 this=this, 6707 namespaces=namespaces, 6708 passing=passing, 6709 columns=columns, 6710 by_ref=by_ref, 6711 ) 6712 6713 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6714 namespaces = [] 6715 6716 while True: 6717 if self._match(TokenType.DEFAULT): 6718 uri = self._parse_string() 6719 else: 6720 uri = self._parse_alias(self._parse_string()) 6721 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6722 if not self._match(TokenType.COMMA): 6723 break 6724 6725 return namespaces 6726 6727 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6728 args = self._parse_csv(self._parse_assignment) 6729 6730 if len(args) < 3: 6731 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6732 6733 return self.expression(exp.DecodeCase, expressions=args) 6734 6735 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6736 self._match_text_seq("KEY") 6737 key = self._parse_column() 6738 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6739 self._match_text_seq("VALUE") 6740 value = self._parse_bitwise() 6741 6742 if not key and not value: 6743 return None 6744 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6745 6746 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6747 if not this or not self._match_text_seq("FORMAT", "JSON"): 6748 return this 6749 6750 return self.expression(exp.FormatJson, this=this) 6751 6752 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6753 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6754 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6755 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6756 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6757 else: 6758 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6759 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6760 6761 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6762 6763 if not empty and not error and not null: 6764 return None 6765 6766 return self.expression( 6767 exp.OnCondition, 6768 empty=empty, 6769 error=error, 6770 null=null, 6771 ) 6772 6773 def _parse_on_handling( 6774 self, on: str, *values: str 6775 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6776 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6777 for value in values: 6778 if self._match_text_seq(value, "ON", on): 6779 return f"{value} ON {on}" 6780 6781 index = self._index 6782 if self._match(TokenType.DEFAULT): 6783 default_value = self._parse_bitwise() 6784 if self._match_text_seq("ON", on): 6785 return default_value 6786 6787 self._retreat(index) 6788 6789 return None 6790 6791 @t.overload 6792 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6793 6794 @t.overload 6795 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6796 6797 def _parse_json_object(self, agg=False): 6798 star = self._parse_star() 6799 expressions = ( 6800 [star] 6801 if star 6802 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6803 ) 6804 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6805 6806 unique_keys = None 6807 if self._match_text_seq("WITH", "UNIQUE"): 6808 unique_keys = True 6809 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6810 unique_keys = False 6811 6812 self._match_text_seq("KEYS") 6813 6814 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6815 self._parse_type() 6816 ) 6817 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6818 6819 return self.expression( 6820 exp.JSONObjectAgg if agg else exp.JSONObject, 6821 expressions=expressions, 6822 null_handling=null_handling, 6823 unique_keys=unique_keys, 6824 return_type=return_type, 6825 encoding=encoding, 6826 ) 6827 6828 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6829 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6830 if not self._match_text_seq("NESTED"): 6831 this = self._parse_id_var() 6832 kind = self._parse_types(allow_identifiers=False) 6833 nested = None 6834 else: 6835 this = None 6836 kind = None 6837 nested = True 6838 6839 path = self._match_text_seq("PATH") and self._parse_string() 6840 nested_schema = nested and self._parse_json_schema() 6841 6842 return self.expression( 6843 exp.JSONColumnDef, 6844 this=this, 6845 kind=kind, 6846 path=path, 6847 nested_schema=nested_schema, 6848 ) 6849 6850 def _parse_json_schema(self) -> exp.JSONSchema: 6851 self._match_text_seq("COLUMNS") 6852 return self.expression( 6853 exp.JSONSchema, 6854 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6855 ) 6856 6857 def _parse_json_table(self) -> exp.JSONTable: 6858 this = self._parse_format_json(self._parse_bitwise()) 6859 path = self._match(TokenType.COMMA) and self._parse_string() 6860 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6861 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6862 schema = self._parse_json_schema() 6863 6864 return exp.JSONTable( 6865 this=this, 6866 schema=schema, 6867 path=path, 6868 error_handling=error_handling, 6869 empty_handling=empty_handling, 6870 ) 6871 6872 def _parse_match_against(self) -> exp.MatchAgainst: 6873 if self._match_text_seq("TABLE"): 6874 # parse SingleStore MATCH(TABLE ...) syntax 6875 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6876 expressions = [] 6877 table = self._parse_table() 6878 if table: 6879 expressions = [table] 6880 else: 6881 expressions = self._parse_csv(self._parse_column) 6882 6883 self._match_text_seq(")", "AGAINST", "(") 6884 6885 this = self._parse_string() 6886 6887 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6888 modifier = "IN NATURAL LANGUAGE MODE" 6889 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6890 modifier = f"{modifier} WITH QUERY EXPANSION" 6891 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6892 modifier = "IN BOOLEAN MODE" 6893 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6894 modifier = "WITH QUERY EXPANSION" 6895 else: 6896 modifier = None 6897 6898 return self.expression( 6899 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6900 ) 6901 6902 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6903 def _parse_open_json(self) -> exp.OpenJSON: 6904 this = self._parse_bitwise() 6905 path = self._match(TokenType.COMMA) and self._parse_string() 6906 6907 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6908 this = self._parse_field(any_token=True) 6909 kind = self._parse_types() 6910 path = self._parse_string() 6911 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6912 6913 return self.expression( 6914 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6915 ) 6916 6917 expressions = None 6918 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6919 self._match_l_paren() 6920 expressions = self._parse_csv(_parse_open_json_column_def) 6921 6922 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6923 6924 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6925 args = self._parse_csv(self._parse_bitwise) 6926 6927 if self._match(TokenType.IN): 6928 return self.expression( 6929 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6930 ) 6931 6932 if haystack_first: 6933 haystack = seq_get(args, 0) 6934 needle = seq_get(args, 1) 6935 else: 6936 haystack = seq_get(args, 1) 6937 needle = seq_get(args, 0) 6938 6939 return self.expression( 6940 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6941 ) 6942 6943 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6944 args = self._parse_csv(self._parse_table) 6945 return exp.JoinHint(this=func_name.upper(), expressions=args) 6946 6947 def _parse_substring(self) -> exp.Substring: 6948 # Postgres supports the form: substring(string [from int] [for int]) 6949 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6950 6951 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6952 6953 if self._match(TokenType.FROM): 6954 args.append(self._parse_bitwise()) 6955 if self._match(TokenType.FOR): 6956 if len(args) == 1: 6957 args.append(exp.Literal.number(1)) 6958 args.append(self._parse_bitwise()) 6959 6960 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6961 6962 def _parse_trim(self) -> exp.Trim: 6963 # https://www.w3resource.com/sql/character-functions/trim.php 6964 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6965 6966 position = None 6967 collation = None 6968 expression = None 6969 6970 if self._match_texts(self.TRIM_TYPES): 6971 position = self._prev.text.upper() 6972 6973 this = self._parse_bitwise() 6974 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6975 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6976 expression = self._parse_bitwise() 6977 6978 if invert_order: 6979 this, expression = expression, this 6980 6981 if self._match(TokenType.COLLATE): 6982 collation = self._parse_bitwise() 6983 6984 return self.expression( 6985 exp.Trim, this=this, position=position, expression=expression, collation=collation 6986 ) 6987 6988 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6989 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6990 6991 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6992 return self._parse_window(self._parse_id_var(), alias=True) 6993 6994 def _parse_respect_or_ignore_nulls( 6995 self, this: t.Optional[exp.Expression] 6996 ) -> t.Optional[exp.Expression]: 6997 if self._match_text_seq("IGNORE", "NULLS"): 6998 return self.expression(exp.IgnoreNulls, this=this) 6999 if self._match_text_seq("RESPECT", "NULLS"): 7000 return self.expression(exp.RespectNulls, this=this) 7001 return this 7002 7003 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 7004 if self._match(TokenType.HAVING): 7005 self._match_texts(("MAX", "MIN")) 7006 max = self._prev.text.upper() != "MIN" 7007 return self.expression( 7008 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7009 ) 7010 7011 return this 7012 7013 def _parse_window( 7014 self, this: t.Optional[exp.Expression], alias: bool = False 7015 ) -> t.Optional[exp.Expression]: 7016 func = this 7017 comments = func.comments if isinstance(func, exp.Expression) else None 7018 7019 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7020 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7021 if self._match_text_seq("WITHIN", "GROUP"): 7022 order = self._parse_wrapped(self._parse_order) 7023 this = self.expression(exp.WithinGroup, this=this, expression=order) 7024 7025 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7026 self._match(TokenType.WHERE) 7027 this = self.expression( 7028 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7029 ) 7030 self._match_r_paren() 7031 7032 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7033 # Some dialects choose to implement and some do not. 7034 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7035 7036 # There is some code above in _parse_lambda that handles 7037 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7038 7039 # The below changes handle 7040 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7041 7042 # Oracle allows both formats 7043 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7044 # and Snowflake chose to do the same for familiarity 7045 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7046 if isinstance(this, exp.AggFunc): 7047 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7048 7049 if ignore_respect and ignore_respect is not this: 7050 ignore_respect.replace(ignore_respect.this) 7051 this = self.expression(ignore_respect.__class__, this=this) 7052 7053 this = self._parse_respect_or_ignore_nulls(this) 7054 7055 # bigquery select from window x AS (partition by ...) 7056 if alias: 7057 over = None 7058 self._match(TokenType.ALIAS) 7059 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7060 return this 7061 else: 7062 over = self._prev.text.upper() 7063 7064 if comments and isinstance(func, exp.Expression): 7065 func.pop_comments() 7066 7067 if not self._match(TokenType.L_PAREN): 7068 return self.expression( 7069 exp.Window, 7070 comments=comments, 7071 this=this, 7072 alias=self._parse_id_var(False), 7073 over=over, 7074 ) 7075 7076 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7077 7078 first = self._match(TokenType.FIRST) 7079 if self._match_text_seq("LAST"): 7080 first = False 7081 7082 partition, order = self._parse_partition_and_order() 7083 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7084 7085 if kind: 7086 self._match(TokenType.BETWEEN) 7087 start = self._parse_window_spec() 7088 7089 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 7090 exclude = ( 7091 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7092 if self._match_text_seq("EXCLUDE") 7093 else None 7094 ) 7095 7096 spec = self.expression( 7097 exp.WindowSpec, 7098 kind=kind, 7099 start=start["value"], 7100 start_side=start["side"], 7101 end=end.get("value"), 7102 end_side=end.get("side"), 7103 exclude=exclude, 7104 ) 7105 else: 7106 spec = None 7107 7108 self._match_r_paren() 7109 7110 window = self.expression( 7111 exp.Window, 7112 comments=comments, 7113 this=this, 7114 partition_by=partition, 7115 order=order, 7116 spec=spec, 7117 alias=window_alias, 7118 over=over, 7119 first=first, 7120 ) 7121 7122 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7123 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7124 return self._parse_window(window, alias=alias) 7125 7126 return window 7127 7128 def _parse_partition_and_order( 7129 self, 7130 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7131 return self._parse_partition_by(), self._parse_order() 7132 7133 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7134 self._match(TokenType.BETWEEN) 7135 7136 return { 7137 "value": ( 7138 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7139 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7140 or self._parse_type() 7141 ), 7142 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7143 } 7144 7145 def _parse_alias( 7146 self, this: t.Optional[exp.Expression], explicit: bool = False 7147 ) -> t.Optional[exp.Expression]: 7148 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7149 # so this section tries to parse the clause version and if it fails, it treats the token 7150 # as an identifier (alias) 7151 if self._can_parse_limit_or_offset(): 7152 return this 7153 7154 any_token = self._match(TokenType.ALIAS) 7155 comments = self._prev_comments or [] 7156 7157 if explicit and not any_token: 7158 return this 7159 7160 if self._match(TokenType.L_PAREN): 7161 aliases = self.expression( 7162 exp.Aliases, 7163 comments=comments, 7164 this=this, 7165 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7166 ) 7167 self._match_r_paren(aliases) 7168 return aliases 7169 7170 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7171 self.STRING_ALIASES and self._parse_string_as_identifier() 7172 ) 7173 7174 if alias: 7175 comments.extend(alias.pop_comments()) 7176 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7177 column = this.this 7178 7179 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7180 if not this.comments and column and column.comments: 7181 this.comments = column.pop_comments() 7182 7183 return this 7184 7185 def _parse_id_var( 7186 self, 7187 any_token: bool = True, 7188 tokens: t.Optional[t.Collection[TokenType]] = None, 7189 ) -> t.Optional[exp.Expression]: 7190 expression = self._parse_identifier() 7191 if not expression and ( 7192 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7193 ): 7194 quoted = self._prev.token_type == TokenType.STRING 7195 expression = self._identifier_expression(quoted=quoted) 7196 7197 return expression 7198 7199 def _parse_string(self) -> t.Optional[exp.Expression]: 7200 if self._match_set(self.STRING_PARSERS): 7201 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7202 return self._parse_placeholder() 7203 7204 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7205 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7206 if output: 7207 output.update_positions(self._prev) 7208 return output 7209 7210 def _parse_number(self) -> t.Optional[exp.Expression]: 7211 if self._match_set(self.NUMERIC_PARSERS): 7212 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7213 return self._parse_placeholder() 7214 7215 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7216 if self._match(TokenType.IDENTIFIER): 7217 return self._identifier_expression(quoted=True) 7218 return self._parse_placeholder() 7219 7220 def _parse_var( 7221 self, 7222 any_token: bool = False, 7223 tokens: t.Optional[t.Collection[TokenType]] = None, 7224 upper: bool = False, 7225 ) -> t.Optional[exp.Expression]: 7226 if ( 7227 (any_token and self._advance_any()) 7228 or self._match(TokenType.VAR) 7229 or (self._match_set(tokens) if tokens else False) 7230 ): 7231 return self.expression( 7232 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7233 ) 7234 return self._parse_placeholder() 7235 7236 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7237 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7238 self._advance() 7239 return self._prev 7240 return None 7241 7242 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7243 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7244 7245 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7246 return self._parse_primary() or self._parse_var(any_token=True) 7247 7248 def _parse_null(self) -> t.Optional[exp.Expression]: 7249 if self._match_set(self.NULL_TOKENS): 7250 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7251 return self._parse_placeholder() 7252 7253 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7254 if self._match(TokenType.TRUE): 7255 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7256 if self._match(TokenType.FALSE): 7257 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7258 return self._parse_placeholder() 7259 7260 def _parse_star(self) -> t.Optional[exp.Expression]: 7261 if self._match(TokenType.STAR): 7262 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7263 return self._parse_placeholder() 7264 7265 def _parse_parameter(self) -> exp.Parameter: 7266 this = self._parse_identifier() or self._parse_primary_or_var() 7267 return self.expression(exp.Parameter, this=this) 7268 7269 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7270 if self._match_set(self.PLACEHOLDER_PARSERS): 7271 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7272 if placeholder: 7273 return placeholder 7274 self._advance(-1) 7275 return None 7276 7277 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7278 if not self._match_texts(keywords): 7279 return None 7280 if self._match(TokenType.L_PAREN, advance=False): 7281 return self._parse_wrapped_csv(self._parse_expression) 7282 7283 expression = self._parse_expression() 7284 return [expression] if expression else None 7285 7286 def _parse_csv( 7287 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7288 ) -> t.List[exp.Expression]: 7289 parse_result = parse_method() 7290 items = [parse_result] if parse_result is not None else [] 7291 7292 while self._match(sep): 7293 self._add_comments(parse_result) 7294 parse_result = parse_method() 7295 if parse_result is not None: 7296 items.append(parse_result) 7297 7298 return items 7299 7300 def _parse_tokens( 7301 self, parse_method: t.Callable, expressions: t.Dict 7302 ) -> t.Optional[exp.Expression]: 7303 this = parse_method() 7304 7305 while self._match_set(expressions): 7306 this = self.expression( 7307 expressions[self._prev.token_type], 7308 this=this, 7309 comments=self._prev_comments, 7310 expression=parse_method(), 7311 ) 7312 7313 return this 7314 7315 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7316 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7317 7318 def _parse_wrapped_csv( 7319 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7320 ) -> t.List[exp.Expression]: 7321 return self._parse_wrapped( 7322 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7323 ) 7324 7325 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7326 wrapped = self._match(TokenType.L_PAREN) 7327 if not wrapped and not optional: 7328 self.raise_error("Expecting (") 7329 parse_result = parse_method() 7330 if wrapped: 7331 self._match_r_paren() 7332 return parse_result 7333 7334 def _parse_expressions(self) -> t.List[exp.Expression]: 7335 return self._parse_csv(self._parse_expression) 7336 7337 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7338 return ( 7339 self._parse_set_operations( 7340 self._parse_alias(self._parse_assignment(), explicit=True) 7341 if alias 7342 else self._parse_assignment() 7343 ) 7344 or self._parse_select() 7345 ) 7346 7347 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7348 return self._parse_query_modifiers( 7349 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7350 ) 7351 7352 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7353 this = None 7354 if self._match_texts(self.TRANSACTION_KIND): 7355 this = self._prev.text 7356 7357 self._match_texts(("TRANSACTION", "WORK")) 7358 7359 modes = [] 7360 while True: 7361 mode = [] 7362 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7363 mode.append(self._prev.text) 7364 7365 if mode: 7366 modes.append(" ".join(mode)) 7367 if not self._match(TokenType.COMMA): 7368 break 7369 7370 return self.expression(exp.Transaction, this=this, modes=modes) 7371 7372 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7373 chain = None 7374 savepoint = None 7375 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7376 7377 self._match_texts(("TRANSACTION", "WORK")) 7378 7379 if self._match_text_seq("TO"): 7380 self._match_text_seq("SAVEPOINT") 7381 savepoint = self._parse_id_var() 7382 7383 if self._match(TokenType.AND): 7384 chain = not self._match_text_seq("NO") 7385 self._match_text_seq("CHAIN") 7386 7387 if is_rollback: 7388 return self.expression(exp.Rollback, savepoint=savepoint) 7389 7390 return self.expression(exp.Commit, chain=chain) 7391 7392 def _parse_refresh(self) -> exp.Refresh: 7393 self._match(TokenType.TABLE) 7394 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7395 7396 def _parse_column_def_with_exists(self): 7397 start = self._index 7398 self._match(TokenType.COLUMN) 7399 7400 exists_column = self._parse_exists(not_=True) 7401 expression = self._parse_field_def() 7402 7403 if not isinstance(expression, exp.ColumnDef): 7404 self._retreat(start) 7405 return None 7406 7407 expression.set("exists", exists_column) 7408 7409 return expression 7410 7411 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7412 if not self._prev.text.upper() == "ADD": 7413 return None 7414 7415 expression = self._parse_column_def_with_exists() 7416 if not expression: 7417 return None 7418 7419 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7420 if self._match_texts(("FIRST", "AFTER")): 7421 position = self._prev.text 7422 column_position = self.expression( 7423 exp.ColumnPosition, this=self._parse_column(), position=position 7424 ) 7425 expression.set("position", column_position) 7426 7427 return expression 7428 7429 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7430 drop = self._match(TokenType.DROP) and self._parse_drop() 7431 if drop and not isinstance(drop, exp.Command): 7432 drop.set("kind", drop.args.get("kind", "COLUMN")) 7433 return drop 7434 7435 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7436 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7437 return self.expression( 7438 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7439 ) 7440 7441 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7442 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7443 self._match_text_seq("ADD") 7444 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7445 return self.expression( 7446 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7447 ) 7448 7449 column_def = self._parse_add_column() 7450 if isinstance(column_def, exp.ColumnDef): 7451 return column_def 7452 7453 exists = self._parse_exists(not_=True) 7454 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7455 return self.expression( 7456 exp.AddPartition, 7457 exists=exists, 7458 this=self._parse_field(any_token=True), 7459 location=self._match_text_seq("LOCATION", advance=False) 7460 and self._parse_property(), 7461 ) 7462 7463 return None 7464 7465 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7466 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7467 or self._match_text_seq("COLUMNS") 7468 ): 7469 schema = self._parse_schema() 7470 7471 return ( 7472 ensure_list(schema) 7473 if schema 7474 else self._parse_csv(self._parse_column_def_with_exists) 7475 ) 7476 7477 return self._parse_csv(_parse_add_alteration) 7478 7479 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7480 if self._match_texts(self.ALTER_ALTER_PARSERS): 7481 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7482 7483 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7484 # keyword after ALTER we default to parsing this statement 7485 self._match(TokenType.COLUMN) 7486 column = self._parse_field(any_token=True) 7487 7488 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7489 return self.expression(exp.AlterColumn, this=column, drop=True) 7490 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7491 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7492 if self._match(TokenType.COMMENT): 7493 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7494 if self._match_text_seq("DROP", "NOT", "NULL"): 7495 return self.expression( 7496 exp.AlterColumn, 7497 this=column, 7498 drop=True, 7499 allow_null=True, 7500 ) 7501 if self._match_text_seq("SET", "NOT", "NULL"): 7502 return self.expression( 7503 exp.AlterColumn, 7504 this=column, 7505 allow_null=False, 7506 ) 7507 7508 if self._match_text_seq("SET", "VISIBLE"): 7509 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7510 if self._match_text_seq("SET", "INVISIBLE"): 7511 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7512 7513 self._match_text_seq("SET", "DATA") 7514 self._match_text_seq("TYPE") 7515 return self.expression( 7516 exp.AlterColumn, 7517 this=column, 7518 dtype=self._parse_types(), 7519 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7520 using=self._match(TokenType.USING) and self._parse_assignment(), 7521 ) 7522 7523 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7524 if self._match_texts(("ALL", "EVEN", "AUTO")): 7525 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7526 7527 self._match_text_seq("KEY", "DISTKEY") 7528 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7529 7530 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7531 if compound: 7532 self._match_text_seq("SORTKEY") 7533 7534 if self._match(TokenType.L_PAREN, advance=False): 7535 return self.expression( 7536 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7537 ) 7538 7539 self._match_texts(("AUTO", "NONE")) 7540 return self.expression( 7541 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7542 ) 7543 7544 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7545 index = self._index - 1 7546 7547 partition_exists = self._parse_exists() 7548 if self._match(TokenType.PARTITION, advance=False): 7549 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7550 7551 self._retreat(index) 7552 return self._parse_csv(self._parse_drop_column) 7553 7554 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7555 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7556 exists = self._parse_exists() 7557 old_column = self._parse_column() 7558 to = self._match_text_seq("TO") 7559 new_column = self._parse_column() 7560 7561 if old_column is None or to is None or new_column is None: 7562 return None 7563 7564 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7565 7566 self._match_text_seq("TO") 7567 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7568 7569 def _parse_alter_table_set(self) -> exp.AlterSet: 7570 alter_set = self.expression(exp.AlterSet) 7571 7572 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7573 "TABLE", "PROPERTIES" 7574 ): 7575 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7576 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7577 alter_set.set("expressions", [self._parse_assignment()]) 7578 elif self._match_texts(("LOGGED", "UNLOGGED")): 7579 alter_set.set("option", exp.var(self._prev.text.upper())) 7580 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7581 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7582 elif self._match_text_seq("LOCATION"): 7583 alter_set.set("location", self._parse_field()) 7584 elif self._match_text_seq("ACCESS", "METHOD"): 7585 alter_set.set("access_method", self._parse_field()) 7586 elif self._match_text_seq("TABLESPACE"): 7587 alter_set.set("tablespace", self._parse_field()) 7588 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7589 alter_set.set("file_format", [self._parse_field()]) 7590 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7591 alter_set.set("file_format", self._parse_wrapped_options()) 7592 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7593 alter_set.set("copy_options", self._parse_wrapped_options()) 7594 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7595 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7596 else: 7597 if self._match_text_seq("SERDE"): 7598 alter_set.set("serde", self._parse_field()) 7599 7600 properties = self._parse_wrapped(self._parse_properties, optional=True) 7601 alter_set.set("expressions", [properties]) 7602 7603 return alter_set 7604 7605 def _parse_alter_session(self) -> exp.AlterSession: 7606 """Parse ALTER SESSION SET/UNSET statements.""" 7607 if self._match(TokenType.SET): 7608 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7609 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7610 7611 self._match_text_seq("UNSET") 7612 expressions = self._parse_csv( 7613 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7614 ) 7615 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7616 7617 def _parse_alter(self) -> exp.Alter | exp.Command: 7618 start = self._prev 7619 7620 alter_token = self._match_set(self.ALTERABLES) and self._prev 7621 if not alter_token: 7622 return self._parse_as_command(start) 7623 7624 exists = self._parse_exists() 7625 only = self._match_text_seq("ONLY") 7626 7627 if alter_token.token_type == TokenType.SESSION: 7628 this = None 7629 check = None 7630 cluster = None 7631 else: 7632 this = self._parse_table(schema=True) 7633 check = self._match_text_seq("WITH", "CHECK") 7634 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7635 7636 if self._next: 7637 self._advance() 7638 7639 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7640 if parser: 7641 actions = ensure_list(parser(self)) 7642 not_valid = self._match_text_seq("NOT", "VALID") 7643 options = self._parse_csv(self._parse_property) 7644 7645 if not self._curr and actions: 7646 return self.expression( 7647 exp.Alter, 7648 this=this, 7649 kind=alter_token.text.upper(), 7650 exists=exists, 7651 actions=actions, 7652 only=only, 7653 options=options, 7654 cluster=cluster, 7655 not_valid=not_valid, 7656 check=check, 7657 ) 7658 7659 return self._parse_as_command(start) 7660 7661 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7662 start = self._prev 7663 # https://duckdb.org/docs/sql/statements/analyze 7664 if not self._curr: 7665 return self.expression(exp.Analyze) 7666 7667 options = [] 7668 while self._match_texts(self.ANALYZE_STYLES): 7669 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7670 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7671 else: 7672 options.append(self._prev.text.upper()) 7673 7674 this: t.Optional[exp.Expression] = None 7675 inner_expression: t.Optional[exp.Expression] = None 7676 7677 kind = self._curr and self._curr.text.upper() 7678 7679 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7680 this = self._parse_table_parts() 7681 elif self._match_text_seq("TABLES"): 7682 if self._match_set((TokenType.FROM, TokenType.IN)): 7683 kind = f"{kind} {self._prev.text.upper()}" 7684 this = self._parse_table(schema=True, is_db_reference=True) 7685 elif self._match_text_seq("DATABASE"): 7686 this = self._parse_table(schema=True, is_db_reference=True) 7687 elif self._match_text_seq("CLUSTER"): 7688 this = self._parse_table() 7689 # Try matching inner expr keywords before fallback to parse table. 7690 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7691 kind = None 7692 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7693 else: 7694 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7695 kind = None 7696 this = self._parse_table_parts() 7697 7698 partition = self._try_parse(self._parse_partition) 7699 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7700 return self._parse_as_command(start) 7701 7702 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7703 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7704 "WITH", "ASYNC", "MODE" 7705 ): 7706 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7707 else: 7708 mode = None 7709 7710 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7711 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7712 7713 properties = self._parse_properties() 7714 return self.expression( 7715 exp.Analyze, 7716 kind=kind, 7717 this=this, 7718 mode=mode, 7719 partition=partition, 7720 properties=properties, 7721 expression=inner_expression, 7722 options=options, 7723 ) 7724 7725 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7726 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7727 this = None 7728 kind = self._prev.text.upper() 7729 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7730 expressions = [] 7731 7732 if not self._match_text_seq("STATISTICS"): 7733 self.raise_error("Expecting token STATISTICS") 7734 7735 if self._match_text_seq("NOSCAN"): 7736 this = "NOSCAN" 7737 elif self._match(TokenType.FOR): 7738 if self._match_text_seq("ALL", "COLUMNS"): 7739 this = "FOR ALL COLUMNS" 7740 if self._match_texts("COLUMNS"): 7741 this = "FOR COLUMNS" 7742 expressions = self._parse_csv(self._parse_column_reference) 7743 elif self._match_text_seq("SAMPLE"): 7744 sample = self._parse_number() 7745 expressions = [ 7746 self.expression( 7747 exp.AnalyzeSample, 7748 sample=sample, 7749 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7750 ) 7751 ] 7752 7753 return self.expression( 7754 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7755 ) 7756 7757 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7758 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7759 kind = None 7760 this = None 7761 expression: t.Optional[exp.Expression] = None 7762 if self._match_text_seq("REF", "UPDATE"): 7763 kind = "REF" 7764 this = "UPDATE" 7765 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7766 this = "UPDATE SET DANGLING TO NULL" 7767 elif self._match_text_seq("STRUCTURE"): 7768 kind = "STRUCTURE" 7769 if self._match_text_seq("CASCADE", "FAST"): 7770 this = "CASCADE FAST" 7771 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7772 ("ONLINE", "OFFLINE") 7773 ): 7774 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7775 expression = self._parse_into() 7776 7777 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7778 7779 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7780 this = self._prev.text.upper() 7781 if self._match_text_seq("COLUMNS"): 7782 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7783 return None 7784 7785 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7786 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7787 if self._match_text_seq("STATISTICS"): 7788 return self.expression(exp.AnalyzeDelete, kind=kind) 7789 return None 7790 7791 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7792 if self._match_text_seq("CHAINED", "ROWS"): 7793 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7794 return None 7795 7796 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7797 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7798 this = self._prev.text.upper() 7799 expression: t.Optional[exp.Expression] = None 7800 expressions = [] 7801 update_options = None 7802 7803 if self._match_text_seq("HISTOGRAM", "ON"): 7804 expressions = self._parse_csv(self._parse_column_reference) 7805 with_expressions = [] 7806 while self._match(TokenType.WITH): 7807 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7808 if self._match_texts(("SYNC", "ASYNC")): 7809 if self._match_text_seq("MODE", advance=False): 7810 with_expressions.append(f"{self._prev.text.upper()} MODE") 7811 self._advance() 7812 else: 7813 buckets = self._parse_number() 7814 if self._match_text_seq("BUCKETS"): 7815 with_expressions.append(f"{buckets} BUCKETS") 7816 if with_expressions: 7817 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7818 7819 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7820 TokenType.UPDATE, advance=False 7821 ): 7822 update_options = self._prev.text.upper() 7823 self._advance() 7824 elif self._match_text_seq("USING", "DATA"): 7825 expression = self.expression(exp.UsingData, this=self._parse_string()) 7826 7827 return self.expression( 7828 exp.AnalyzeHistogram, 7829 this=this, 7830 expressions=expressions, 7831 expression=expression, 7832 update_options=update_options, 7833 ) 7834 7835 def _parse_merge(self) -> exp.Merge: 7836 self._match(TokenType.INTO) 7837 target = self._parse_table() 7838 7839 if target and self._match(TokenType.ALIAS, advance=False): 7840 target.set("alias", self._parse_table_alias()) 7841 7842 self._match(TokenType.USING) 7843 using = self._parse_table() 7844 7845 self._match(TokenType.ON) 7846 on = self._parse_assignment() 7847 7848 return self.expression( 7849 exp.Merge, 7850 this=target, 7851 using=using, 7852 on=on, 7853 whens=self._parse_when_matched(), 7854 returning=self._parse_returning(), 7855 ) 7856 7857 def _parse_when_matched(self) -> exp.Whens: 7858 whens = [] 7859 7860 while self._match(TokenType.WHEN): 7861 matched = not self._match(TokenType.NOT) 7862 self._match_text_seq("MATCHED") 7863 source = ( 7864 False 7865 if self._match_text_seq("BY", "TARGET") 7866 else self._match_text_seq("BY", "SOURCE") 7867 ) 7868 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7869 7870 self._match(TokenType.THEN) 7871 7872 if self._match(TokenType.INSERT): 7873 this = self._parse_star() 7874 if this: 7875 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7876 else: 7877 then = self.expression( 7878 exp.Insert, 7879 this=exp.var("ROW") 7880 if self._match_text_seq("ROW") 7881 else self._parse_value(values=False), 7882 expression=self._match_text_seq("VALUES") and self._parse_value(), 7883 ) 7884 elif self._match(TokenType.UPDATE): 7885 expressions = self._parse_star() 7886 if expressions: 7887 then = self.expression(exp.Update, expressions=expressions) 7888 else: 7889 then = self.expression( 7890 exp.Update, 7891 expressions=self._match(TokenType.SET) 7892 and self._parse_csv(self._parse_equality), 7893 ) 7894 elif self._match(TokenType.DELETE): 7895 then = self.expression(exp.Var, this=self._prev.text) 7896 else: 7897 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7898 7899 whens.append( 7900 self.expression( 7901 exp.When, 7902 matched=matched, 7903 source=source, 7904 condition=condition, 7905 then=then, 7906 ) 7907 ) 7908 return self.expression(exp.Whens, expressions=whens) 7909 7910 def _parse_show(self) -> t.Optional[exp.Expression]: 7911 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7912 if parser: 7913 return parser(self) 7914 return self._parse_as_command(self._prev) 7915 7916 def _parse_set_item_assignment( 7917 self, kind: t.Optional[str] = None 7918 ) -> t.Optional[exp.Expression]: 7919 index = self._index 7920 7921 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7922 return self._parse_set_transaction(global_=kind == "GLOBAL") 7923 7924 left = self._parse_primary() or self._parse_column() 7925 assignment_delimiter = self._match_texts(("=", "TO")) 7926 7927 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7928 self._retreat(index) 7929 return None 7930 7931 right = self._parse_statement() or self._parse_id_var() 7932 if isinstance(right, (exp.Column, exp.Identifier)): 7933 right = exp.var(right.name) 7934 7935 this = self.expression(exp.EQ, this=left, expression=right) 7936 return self.expression(exp.SetItem, this=this, kind=kind) 7937 7938 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7939 self._match_text_seq("TRANSACTION") 7940 characteristics = self._parse_csv( 7941 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7942 ) 7943 return self.expression( 7944 exp.SetItem, 7945 expressions=characteristics, 7946 kind="TRANSACTION", 7947 **{"global": global_}, # type: ignore 7948 ) 7949 7950 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7951 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7952 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7953 7954 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7955 index = self._index 7956 set_ = self.expression( 7957 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7958 ) 7959 7960 if self._curr: 7961 self._retreat(index) 7962 return self._parse_as_command(self._prev) 7963 7964 return set_ 7965 7966 def _parse_var_from_options( 7967 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7968 ) -> t.Optional[exp.Var]: 7969 start = self._curr 7970 if not start: 7971 return None 7972 7973 option = start.text.upper() 7974 continuations = options.get(option) 7975 7976 index = self._index 7977 self._advance() 7978 for keywords in continuations or []: 7979 if isinstance(keywords, str): 7980 keywords = (keywords,) 7981 7982 if self._match_text_seq(*keywords): 7983 option = f"{option} {' '.join(keywords)}" 7984 break 7985 else: 7986 if continuations or continuations is None: 7987 if raise_unmatched: 7988 self.raise_error(f"Unknown option {option}") 7989 7990 self._retreat(index) 7991 return None 7992 7993 return exp.var(option) 7994 7995 def _parse_as_command(self, start: Token) -> exp.Command: 7996 while self._curr: 7997 self._advance() 7998 text = self._find_sql(start, self._prev) 7999 size = len(start.text) 8000 self._warn_unsupported() 8001 return exp.Command(this=text[:size], expression=text[size:]) 8002 8003 def _parse_dict_property(self, this: str) -> exp.DictProperty: 8004 settings = [] 8005 8006 self._match_l_paren() 8007 kind = self._parse_id_var() 8008 8009 if self._match(TokenType.L_PAREN): 8010 while True: 8011 key = self._parse_id_var() 8012 value = self._parse_primary() 8013 if not key and value is None: 8014 break 8015 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8016 self._match(TokenType.R_PAREN) 8017 8018 self._match_r_paren() 8019 8020 return self.expression( 8021 exp.DictProperty, 8022 this=this, 8023 kind=kind.this if kind else None, 8024 settings=settings, 8025 ) 8026 8027 def _parse_dict_range(self, this: str) -> exp.DictRange: 8028 self._match_l_paren() 8029 has_min = self._match_text_seq("MIN") 8030 if has_min: 8031 min = self._parse_var() or self._parse_primary() 8032 self._match_text_seq("MAX") 8033 max = self._parse_var() or self._parse_primary() 8034 else: 8035 max = self._parse_var() or self._parse_primary() 8036 min = exp.Literal.number(0) 8037 self._match_r_paren() 8038 return self.expression(exp.DictRange, this=this, min=min, max=max) 8039 8040 def _parse_comprehension( 8041 self, this: t.Optional[exp.Expression] 8042 ) -> t.Optional[exp.Comprehension]: 8043 index = self._index 8044 expression = self._parse_column() 8045 if not self._match(TokenType.IN): 8046 self._retreat(index - 1) 8047 return None 8048 iterator = self._parse_column() 8049 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8050 return self.expression( 8051 exp.Comprehension, 8052 this=this, 8053 expression=expression, 8054 iterator=iterator, 8055 condition=condition, 8056 ) 8057 8058 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8059 if self._match(TokenType.HEREDOC_STRING): 8060 return self.expression(exp.Heredoc, this=self._prev.text) 8061 8062 if not self._match_text_seq("$"): 8063 return None 8064 8065 tags = ["$"] 8066 tag_text = None 8067 8068 if self._is_connected(): 8069 self._advance() 8070 tags.append(self._prev.text.upper()) 8071 else: 8072 self.raise_error("No closing $ found") 8073 8074 if tags[-1] != "$": 8075 if self._is_connected() and self._match_text_seq("$"): 8076 tag_text = tags[-1] 8077 tags.append("$") 8078 else: 8079 self.raise_error("No closing $ found") 8080 8081 heredoc_start = self._curr 8082 8083 while self._curr: 8084 if self._match_text_seq(*tags, advance=False): 8085 this = self._find_sql(heredoc_start, self._prev) 8086 self._advance(len(tags)) 8087 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8088 8089 self._advance() 8090 8091 self.raise_error(f"No closing {''.join(tags)} found") 8092 return None 8093 8094 def _find_parser( 8095 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8096 ) -> t.Optional[t.Callable]: 8097 if not self._curr: 8098 return None 8099 8100 index = self._index 8101 this = [] 8102 while True: 8103 # The current token might be multiple words 8104 curr = self._curr.text.upper() 8105 key = curr.split(" ") 8106 this.append(curr) 8107 8108 self._advance() 8109 result, trie = in_trie(trie, key) 8110 if result == TrieResult.FAILED: 8111 break 8112 8113 if result == TrieResult.EXISTS: 8114 subparser = parsers[" ".join(this)] 8115 return subparser 8116 8117 self._retreat(index) 8118 return None 8119 8120 def _match(self, token_type, advance=True, expression=None): 8121 if not self._curr: 8122 return None 8123 8124 if self._curr.token_type == token_type: 8125 if advance: 8126 self._advance() 8127 self._add_comments(expression) 8128 return True 8129 8130 return None 8131 8132 def _match_set(self, types, advance=True): 8133 if not self._curr: 8134 return None 8135 8136 if self._curr.token_type in types: 8137 if advance: 8138 self._advance() 8139 return True 8140 8141 return None 8142 8143 def _match_pair(self, token_type_a, token_type_b, advance=True): 8144 if not self._curr or not self._next: 8145 return None 8146 8147 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8148 if advance: 8149 self._advance(2) 8150 return True 8151 8152 return None 8153 8154 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8155 if not self._match(TokenType.L_PAREN, expression=expression): 8156 self.raise_error("Expecting (") 8157 8158 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8159 if not self._match(TokenType.R_PAREN, expression=expression): 8160 self.raise_error("Expecting )") 8161 8162 def _match_texts(self, texts, advance=True): 8163 if ( 8164 self._curr 8165 and self._curr.token_type != TokenType.STRING 8166 and self._curr.text.upper() in texts 8167 ): 8168 if advance: 8169 self._advance() 8170 return True 8171 return None 8172 8173 def _match_text_seq(self, *texts, advance=True): 8174 index = self._index 8175 for text in texts: 8176 if ( 8177 self._curr 8178 and self._curr.token_type != TokenType.STRING 8179 and self._curr.text.upper() == text 8180 ): 8181 self._advance() 8182 else: 8183 self._retreat(index) 8184 return None 8185 8186 if not advance: 8187 self._retreat(index) 8188 8189 return True 8190 8191 def _replace_lambda( 8192 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8193 ) -> t.Optional[exp.Expression]: 8194 if not node: 8195 return node 8196 8197 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8198 8199 for column in node.find_all(exp.Column): 8200 typ = lambda_types.get(column.parts[0].name) 8201 if typ is not None: 8202 dot_or_id = column.to_dot() if column.table else column.this 8203 8204 if typ: 8205 dot_or_id = self.expression( 8206 exp.Cast, 8207 this=dot_or_id, 8208 to=typ, 8209 ) 8210 8211 parent = column.parent 8212 8213 while isinstance(parent, exp.Dot): 8214 if not isinstance(parent.parent, exp.Dot): 8215 parent.replace(dot_or_id) 8216 break 8217 parent = parent.parent 8218 else: 8219 if column is node: 8220 node = dot_or_id 8221 else: 8222 column.replace(dot_or_id) 8223 return node 8224 8225 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8226 start = self._prev 8227 8228 # Not to be confused with TRUNCATE(number, decimals) function call 8229 if self._match(TokenType.L_PAREN): 8230 self._retreat(self._index - 2) 8231 return self._parse_function() 8232 8233 # Clickhouse supports TRUNCATE DATABASE as well 8234 is_database = self._match(TokenType.DATABASE) 8235 8236 self._match(TokenType.TABLE) 8237 8238 exists = self._parse_exists(not_=False) 8239 8240 expressions = self._parse_csv( 8241 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8242 ) 8243 8244 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8245 8246 if self._match_text_seq("RESTART", "IDENTITY"): 8247 identity = "RESTART" 8248 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8249 identity = "CONTINUE" 8250 else: 8251 identity = None 8252 8253 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8254 option = self._prev.text 8255 else: 8256 option = None 8257 8258 partition = self._parse_partition() 8259 8260 # Fallback case 8261 if self._curr: 8262 return self._parse_as_command(start) 8263 8264 return self.expression( 8265 exp.TruncateTable, 8266 expressions=expressions, 8267 is_database=is_database, 8268 exists=exists, 8269 cluster=cluster, 8270 identity=identity, 8271 option=option, 8272 partition=partition, 8273 ) 8274 8275 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8276 this = self._parse_ordered(self._parse_opclass) 8277 8278 if not self._match(TokenType.WITH): 8279 return this 8280 8281 op = self._parse_var(any_token=True) 8282 8283 return self.expression(exp.WithOperator, this=this, op=op) 8284 8285 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8286 self._match(TokenType.EQ) 8287 self._match(TokenType.L_PAREN) 8288 8289 opts: t.List[t.Optional[exp.Expression]] = [] 8290 option: exp.Expression | None 8291 while self._curr and not self._match(TokenType.R_PAREN): 8292 if self._match_text_seq("FORMAT_NAME", "="): 8293 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8294 option = self._parse_format_name() 8295 else: 8296 option = self._parse_property() 8297 8298 if option is None: 8299 self.raise_error("Unable to parse option") 8300 break 8301 8302 opts.append(option) 8303 8304 return opts 8305 8306 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8307 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8308 8309 options = [] 8310 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8311 option = self._parse_var(any_token=True) 8312 prev = self._prev.text.upper() 8313 8314 # Different dialects might separate options and values by white space, "=" and "AS" 8315 self._match(TokenType.EQ) 8316 self._match(TokenType.ALIAS) 8317 8318 param = self.expression(exp.CopyParameter, this=option) 8319 8320 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8321 TokenType.L_PAREN, advance=False 8322 ): 8323 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8324 param.set("expressions", self._parse_wrapped_options()) 8325 elif prev == "FILE_FORMAT": 8326 # T-SQL's external file format case 8327 param.set("expression", self._parse_field()) 8328 else: 8329 param.set("expression", self._parse_unquoted_field()) 8330 8331 options.append(param) 8332 self._match(sep) 8333 8334 return options 8335 8336 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8337 expr = self.expression(exp.Credentials) 8338 8339 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8340 expr.set("storage", self._parse_field()) 8341 if self._match_text_seq("CREDENTIALS"): 8342 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8343 creds = ( 8344 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8345 ) 8346 expr.set("credentials", creds) 8347 if self._match_text_seq("ENCRYPTION"): 8348 expr.set("encryption", self._parse_wrapped_options()) 8349 if self._match_text_seq("IAM_ROLE"): 8350 expr.set("iam_role", self._parse_field()) 8351 if self._match_text_seq("REGION"): 8352 expr.set("region", self._parse_field()) 8353 8354 return expr 8355 8356 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8357 return self._parse_field() 8358 8359 def _parse_copy(self) -> exp.Copy | exp.Command: 8360 start = self._prev 8361 8362 self._match(TokenType.INTO) 8363 8364 this = ( 8365 self._parse_select(nested=True, parse_subquery_alias=False) 8366 if self._match(TokenType.L_PAREN, advance=False) 8367 else self._parse_table(schema=True) 8368 ) 8369 8370 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8371 8372 files = self._parse_csv(self._parse_file_location) 8373 credentials = self._parse_credentials() 8374 8375 self._match_text_seq("WITH") 8376 8377 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8378 8379 # Fallback case 8380 if self._curr: 8381 return self._parse_as_command(start) 8382 8383 return self.expression( 8384 exp.Copy, 8385 this=this, 8386 kind=kind, 8387 credentials=credentials, 8388 files=files, 8389 params=params, 8390 ) 8391 8392 def _parse_normalize(self) -> exp.Normalize: 8393 return self.expression( 8394 exp.Normalize, 8395 this=self._parse_bitwise(), 8396 form=self._match(TokenType.COMMA) and self._parse_var(), 8397 ) 8398 8399 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8400 args = self._parse_csv(lambda: self._parse_lambda()) 8401 8402 this = seq_get(args, 0) 8403 decimals = seq_get(args, 1) 8404 8405 return expr_type( 8406 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8407 ) 8408 8409 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8410 star_token = self._prev 8411 8412 if self._match_text_seq("COLUMNS", "(", advance=False): 8413 this = self._parse_function() 8414 if isinstance(this, exp.Columns): 8415 this.set("unpack", True) 8416 return this 8417 8418 return self.expression( 8419 exp.Star, 8420 **{ # type: ignore 8421 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8422 "replace": self._parse_star_op("REPLACE"), 8423 "rename": self._parse_star_op("RENAME"), 8424 }, 8425 ).update_positions(star_token) 8426 8427 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8428 privilege_parts = [] 8429 8430 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8431 # (end of privilege list) or L_PAREN (start of column list) are met 8432 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8433 privilege_parts.append(self._curr.text.upper()) 8434 self._advance() 8435 8436 this = exp.var(" ".join(privilege_parts)) 8437 expressions = ( 8438 self._parse_wrapped_csv(self._parse_column) 8439 if self._match(TokenType.L_PAREN, advance=False) 8440 else None 8441 ) 8442 8443 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8444 8445 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8446 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8447 principal = self._parse_id_var() 8448 8449 if not principal: 8450 return None 8451 8452 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8453 8454 def _parse_grant_revoke_common( 8455 self, 8456 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8457 privileges = self._parse_csv(self._parse_grant_privilege) 8458 8459 self._match(TokenType.ON) 8460 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8461 8462 # Attempt to parse the securable e.g. MySQL allows names 8463 # such as "foo.*", "*.*" which are not easily parseable yet 8464 securable = self._try_parse(self._parse_table_parts) 8465 8466 return privileges, kind, securable 8467 8468 def _parse_grant(self) -> exp.Grant | exp.Command: 8469 start = self._prev 8470 8471 privileges, kind, securable = self._parse_grant_revoke_common() 8472 8473 if not securable or not self._match_text_seq("TO"): 8474 return self._parse_as_command(start) 8475 8476 principals = self._parse_csv(self._parse_grant_principal) 8477 8478 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8479 8480 if self._curr: 8481 return self._parse_as_command(start) 8482 8483 return self.expression( 8484 exp.Grant, 8485 privileges=privileges, 8486 kind=kind, 8487 securable=securable, 8488 principals=principals, 8489 grant_option=grant_option, 8490 ) 8491 8492 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8493 start = self._prev 8494 8495 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8496 8497 privileges, kind, securable = self._parse_grant_revoke_common() 8498 8499 if not securable or not self._match_text_seq("FROM"): 8500 return self._parse_as_command(start) 8501 8502 principals = self._parse_csv(self._parse_grant_principal) 8503 8504 cascade = None 8505 if self._match_texts(("CASCADE", "RESTRICT")): 8506 cascade = self._prev.text.upper() 8507 8508 if self._curr: 8509 return self._parse_as_command(start) 8510 8511 return self.expression( 8512 exp.Revoke, 8513 privileges=privileges, 8514 kind=kind, 8515 securable=securable, 8516 principals=principals, 8517 grant_option=grant_option, 8518 cascade=cascade, 8519 ) 8520 8521 def _parse_overlay(self) -> exp.Overlay: 8522 return self.expression( 8523 exp.Overlay, 8524 **{ # type: ignore 8525 "this": self._parse_bitwise(), 8526 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8527 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8528 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8529 }, 8530 ) 8531 8532 def _parse_format_name(self) -> exp.Property: 8533 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8534 # for FILE_FORMAT = <format_name> 8535 return self.expression( 8536 exp.Property, 8537 this=exp.var("FORMAT_NAME"), 8538 value=self._parse_string() or self._parse_table_parts(), 8539 ) 8540 8541 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8542 args: t.List[exp.Expression] = [] 8543 8544 if self._match(TokenType.DISTINCT): 8545 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8546 self._match(TokenType.COMMA) 8547 8548 args.extend(self._parse_csv(self._parse_assignment)) 8549 8550 return self.expression( 8551 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8552 ) 8553 8554 def _identifier_expression( 8555 self, token: t.Optional[Token] = None, **kwargs: t.Any 8556 ) -> exp.Identifier: 8557 token = token or self._prev 8558 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8559 expression.update_positions(token) 8560 return expression 8561 8562 def _build_pipe_cte( 8563 self, 8564 query: exp.Query, 8565 expressions: t.List[exp.Expression], 8566 alias_cte: t.Optional[exp.TableAlias] = None, 8567 ) -> exp.Select: 8568 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8569 if alias_cte: 8570 new_cte = alias_cte 8571 else: 8572 self._pipe_cte_counter += 1 8573 new_cte = f"__tmp{self._pipe_cte_counter}" 8574 8575 with_ = query.args.get("with") 8576 ctes = with_.pop() if with_ else None 8577 8578 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8579 if ctes: 8580 new_select.set("with", ctes) 8581 8582 return new_select.with_(new_cte, as_=query, copy=False) 8583 8584 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8585 select = self._parse_select(consume_pipe=False) 8586 if not select: 8587 return query 8588 8589 return self._build_pipe_cte( 8590 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8591 ) 8592 8593 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8594 limit = self._parse_limit() 8595 offset = self._parse_offset() 8596 if limit: 8597 curr_limit = query.args.get("limit", limit) 8598 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8599 query.limit(limit, copy=False) 8600 if offset: 8601 curr_offset = query.args.get("offset") 8602 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8603 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8604 8605 return query 8606 8607 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8608 this = self._parse_assignment() 8609 if self._match_text_seq("GROUP", "AND", advance=False): 8610 return this 8611 8612 this = self._parse_alias(this) 8613 8614 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8615 return self._parse_ordered(lambda: this) 8616 8617 return this 8618 8619 def _parse_pipe_syntax_aggregate_group_order_by( 8620 self, query: exp.Select, group_by_exists: bool = True 8621 ) -> exp.Select: 8622 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8623 aggregates_or_groups, orders = [], [] 8624 for element in expr: 8625 if isinstance(element, exp.Ordered): 8626 this = element.this 8627 if isinstance(this, exp.Alias): 8628 element.set("this", this.args["alias"]) 8629 orders.append(element) 8630 else: 8631 this = element 8632 aggregates_or_groups.append(this) 8633 8634 if group_by_exists: 8635 query.select(*aggregates_or_groups, copy=False).group_by( 8636 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8637 copy=False, 8638 ) 8639 else: 8640 query.select(*aggregates_or_groups, append=False, copy=False) 8641 8642 if orders: 8643 return query.order_by(*orders, append=False, copy=False) 8644 8645 return query 8646 8647 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8648 self._match_text_seq("AGGREGATE") 8649 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8650 8651 if self._match(TokenType.GROUP_BY) or ( 8652 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8653 ): 8654 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8655 8656 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8657 8658 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8659 first_setop = self.parse_set_operation(this=query) 8660 if not first_setop: 8661 return None 8662 8663 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8664 expr = self._parse_paren() 8665 return expr.assert_is(exp.Subquery).unnest() if expr else None 8666 8667 first_setop.this.pop() 8668 8669 setops = [ 8670 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8671 *self._parse_csv(_parse_and_unwrap_query), 8672 ] 8673 8674 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8675 with_ = query.args.get("with") 8676 ctes = with_.pop() if with_ else None 8677 8678 if isinstance(first_setop, exp.Union): 8679 query = query.union(*setops, copy=False, **first_setop.args) 8680 elif isinstance(first_setop, exp.Except): 8681 query = query.except_(*setops, copy=False, **first_setop.args) 8682 else: 8683 query = query.intersect(*setops, copy=False, **first_setop.args) 8684 8685 query.set("with", ctes) 8686 8687 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8688 8689 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8690 join = self._parse_join() 8691 if not join: 8692 return None 8693 8694 if isinstance(query, exp.Select): 8695 return query.join(join, copy=False) 8696 8697 return query 8698 8699 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8700 pivots = self._parse_pivots() 8701 if not pivots: 8702 return query 8703 8704 from_ = query.args.get("from") 8705 if from_: 8706 from_.this.set("pivots", pivots) 8707 8708 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8709 8710 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8711 self._match_text_seq("EXTEND") 8712 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8713 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8714 8715 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8716 sample = self._parse_table_sample() 8717 8718 with_ = query.args.get("with") 8719 if with_: 8720 with_.expressions[-1].this.set("sample", sample) 8721 else: 8722 query.set("sample", sample) 8723 8724 return query 8725 8726 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8727 if isinstance(query, exp.Subquery): 8728 query = exp.select("*").from_(query, copy=False) 8729 8730 if not query.args.get("from"): 8731 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8732 8733 while self._match(TokenType.PIPE_GT): 8734 start = self._curr 8735 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8736 if not parser: 8737 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8738 # keywords, making it tricky to disambiguate them without lookahead. The approach 8739 # here is to try and parse a set operation and if that fails, then try to parse a 8740 # join operator. If that fails as well, then the operator is not supported. 8741 parsed_query = self._parse_pipe_syntax_set_operator(query) 8742 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8743 if not parsed_query: 8744 self._retreat(start) 8745 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8746 break 8747 query = parsed_query 8748 else: 8749 query = parser(self, query) 8750 8751 return query 8752 8753 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8754 vars = self._parse_csv(self._parse_id_var) 8755 if not vars: 8756 return None 8757 8758 return self.expression( 8759 exp.DeclareItem, 8760 this=vars, 8761 kind=self._parse_types(), 8762 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8763 ) 8764 8765 def _parse_declare(self) -> exp.Declare | exp.Command: 8766 start = self._prev 8767 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8768 8769 if not expressions or self._curr: 8770 return self._parse_as_command(start) 8771 8772 return self.expression(exp.Declare, expressions=expressions) 8773 8774 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8775 exp_class = exp.Cast if strict else exp.TryCast 8776 8777 if exp_class == exp.TryCast: 8778 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8779 8780 return self.expression(exp_class, **kwargs) 8781 8782 def _parse_json_value(self) -> exp.JSONValue: 8783 this = self._parse_bitwise() 8784 self._match(TokenType.COMMA) 8785 path = self._parse_bitwise() 8786 8787 returning = self._match(TokenType.RETURNING) and self._parse_type() 8788 8789 return self.expression( 8790 exp.JSONValue, 8791 this=this, 8792 path=self.dialect.to_json_path(path), 8793 returning=returning, 8794 on_condition=self._parse_on_condition(), 8795 ) 8796 8797 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8798 def concat_exprs( 8799 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8800 ) -> exp.Expression: 8801 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8802 concat_exprs = [ 8803 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8804 ] 8805 node.set("expressions", concat_exprs) 8806 return node 8807 if len(exprs) == 1: 8808 return exprs[0] 8809 return self.expression(exp.Concat, expressions=args, safe=True) 8810 8811 args = self._parse_csv(self._parse_lambda) 8812 8813 if args: 8814 order = args[-1] if isinstance(args[-1], exp.Order) else None 8815 8816 if order: 8817 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8818 # remove 'expr' from exp.Order and add it back to args 8819 args[-1] = order.this 8820 order.set("this", concat_exprs(order.this, args)) 8821 8822 this = order or concat_exprs(args[0], args) 8823 else: 8824 this = None 8825 8826 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8827 8828 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1578 def __init__( 1579 self, 1580 error_level: t.Optional[ErrorLevel] = None, 1581 error_message_context: int = 100, 1582 max_errors: int = 3, 1583 dialect: DialectType = None, 1584 ): 1585 from sqlglot.dialects import Dialect 1586 1587 self.error_level = error_level or ErrorLevel.IMMEDIATE 1588 self.error_message_context = error_message_context 1589 self.max_errors = max_errors 1590 self.dialect = Dialect.get_or_raise(dialect) 1591 self.reset()
1604 def parse( 1605 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1606 ) -> t.List[t.Optional[exp.Expression]]: 1607 """ 1608 Parses a list of tokens and returns a list of syntax trees, one tree 1609 per parsed SQL statement. 1610 1611 Args: 1612 raw_tokens: The list of tokens. 1613 sql: The original SQL string, used to produce helpful debug messages. 1614 1615 Returns: 1616 The list of the produced syntax trees. 1617 """ 1618 return self._parse( 1619 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1620 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1622 def parse_into( 1623 self, 1624 expression_types: exp.IntoType, 1625 raw_tokens: t.List[Token], 1626 sql: t.Optional[str] = None, 1627 ) -> t.List[t.Optional[exp.Expression]]: 1628 """ 1629 Parses a list of tokens into a given Expression type. If a collection of Expression 1630 types is given instead, this method will try to parse the token list into each one 1631 of them, stopping at the first for which the parsing succeeds. 1632 1633 Args: 1634 expression_types: The expression type(s) to try and parse the token list into. 1635 raw_tokens: The list of tokens. 1636 sql: The original SQL string, used to produce helpful debug messages. 1637 1638 Returns: 1639 The target Expression. 1640 """ 1641 errors = [] 1642 for expression_type in ensure_list(expression_types): 1643 parser = self.EXPRESSION_PARSERS.get(expression_type) 1644 if not parser: 1645 raise TypeError(f"No parser registered for {expression_type}") 1646 1647 try: 1648 return self._parse(parser, raw_tokens, sql) 1649 except ParseError as e: 1650 e.errors[0]["into_expression"] = expression_type 1651 errors.append(e) 1652 1653 raise ParseError( 1654 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1655 errors=merge_errors(errors), 1656 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1696 def check_errors(self) -> None: 1697 """Logs or raises any found errors, depending on the chosen error level setting.""" 1698 if self.error_level == ErrorLevel.WARN: 1699 for error in self.errors: 1700 logger.error(str(error)) 1701 elif self.error_level == ErrorLevel.RAISE and self.errors: 1702 raise ParseError( 1703 concat_messages(self.errors, self.max_errors), 1704 errors=merge_errors(self.errors), 1705 )
Logs or raises any found errors, depending on the chosen error level setting.
1707 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1708 """ 1709 Appends an error in the list of recorded errors or raises it, depending on the chosen 1710 error level setting. 1711 """ 1712 token = token or self._curr or self._prev or Token.string("") 1713 start = token.start 1714 end = token.end + 1 1715 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1716 highlight = self.sql[start:end] 1717 end_context = self.sql[end : end + self.error_message_context] 1718 1719 error = ParseError.new( 1720 f"{message}. Line {token.line}, Col: {token.col}.\n" 1721 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1722 description=message, 1723 line=token.line, 1724 col=token.col, 1725 start_context=start_context, 1726 highlight=highlight, 1727 end_context=end_context, 1728 ) 1729 1730 if self.error_level == ErrorLevel.IMMEDIATE: 1731 raise error 1732 1733 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1735 def expression( 1736 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1737 ) -> E: 1738 """ 1739 Creates a new, validated Expression. 1740 1741 Args: 1742 exp_class: The expression class to instantiate. 1743 comments: An optional list of comments to attach to the expression. 1744 kwargs: The arguments to set for the expression along with their respective values. 1745 1746 Returns: 1747 The target expression. 1748 """ 1749 instance = exp_class(**kwargs) 1750 instance.add_comments(comments) if comments else self._add_comments(instance) 1751 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1758 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1759 """ 1760 Validates an Expression, making sure that all its mandatory arguments are set. 1761 1762 Args: 1763 expression: The expression to validate. 1764 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1765 1766 Returns: 1767 The validated expression. 1768 """ 1769 if self.error_level != ErrorLevel.IGNORE: 1770 for error_message in expression.error_messages(args): 1771 self.raise_error(error_message) 1772 1773 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4843 def parse_set_operation( 4844 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4845 ) -> t.Optional[exp.Expression]: 4846 start = self._index 4847 _, side_token, kind_token = self._parse_join_parts() 4848 4849 side = side_token.text if side_token else None 4850 kind = kind_token.text if kind_token else None 4851 4852 if not self._match_set(self.SET_OPERATIONS): 4853 self._retreat(start) 4854 return None 4855 4856 token_type = self._prev.token_type 4857 4858 if token_type == TokenType.UNION: 4859 operation: t.Type[exp.SetOperation] = exp.Union 4860 elif token_type == TokenType.EXCEPT: 4861 operation = exp.Except 4862 else: 4863 operation = exp.Intersect 4864 4865 comments = self._prev.comments 4866 4867 if self._match(TokenType.DISTINCT): 4868 distinct: t.Optional[bool] = True 4869 elif self._match(TokenType.ALL): 4870 distinct = False 4871 else: 4872 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4873 if distinct is None: 4874 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4875 4876 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4877 "STRICT", "CORRESPONDING" 4878 ) 4879 if self._match_text_seq("CORRESPONDING"): 4880 by_name = True 4881 if not side and not kind: 4882 kind = "INNER" 4883 4884 on_column_list = None 4885 if by_name and self._match_texts(("ON", "BY")): 4886 on_column_list = self._parse_wrapped_csv(self._parse_column) 4887 4888 expression = self._parse_select( 4889 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4890 ) 4891 4892 return self.expression( 4893 operation, 4894 comments=comments, 4895 this=this, 4896 distinct=distinct, 4897 by_name=by_name, 4898 expression=expression, 4899 side=side, 4900 kind=kind, 4901 on=on_column_list, 4902 )